Class: Puree::XMLExtractor::Dataset

Inherits:
Resource show all
Includes:
AssociatedMixin
Defined in:
lib/puree/xml_extractor/dataset.rb

Overview

Dataset XML extractor.

Instance Method Summary collapse

Methods included from AssociatedMixin

#associated

Methods inherited from Resource

#created, #get_data?, #locale, #modified, #uuid, #xpath_query

Methods inherited from Base

#xpath_query_for_multi_value, #xpath_query_for_single_value

Constructor Details

#initialize(xml:) ⇒ Dataset

Returns a new instance of Dataset.



10
11
12
13
# File 'lib/puree/xml_extractor/dataset.rb', line 10

def initialize(xml:)
  super
  @resource_type = :dataset
end

Instance Method Details

#accessString?

Open access permission

Returns:

  • (String, nil)


17
18
19
# File 'lib/puree/xml_extractor/dataset.rb', line 17

def access
  xpath_query_for_single_value '/openAccessPermission/term/localizedString'
end

#availableTime?

Date made available

Returns:

  • (Time, nil)


23
24
25
# File 'lib/puree/xml_extractor/dataset.rb', line 23

def available
  Puree::Util::Date.hash_to_time temporal_date('dateMadeAvailable')
end

#descriptionString?

Returns:

  • (String, nil)


28
29
30
# File 'lib/puree/xml_extractor/dataset.rb', line 28

def description
  xpath_query_for_single_value '/descriptions/classificationDefinedField/value/localizedString'
end

#doiString?

Digital Object Identifier

Returns:

  • (String, nil)


34
35
36
# File 'lib/puree/xml_extractor/dataset.rb', line 34

def doi
  xpath_query_for_single_value '/doi'
end

#filesArray<Puree::Model::File>

Supporting files

Returns:



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/puree/xml_extractor/dataset.rb', line 40

def files
  xpath_result = xpath_query '/documents/document'
  docs = []
  xpath_result.each do |d|
    doc = Puree::Model::File.new
    doc.name = d.xpath('fileName').text.strip
    doc.mime = d.xpath('mimeType').text.strip
    doc.size = d.xpath('size').text.strip.to_i
    doc.url = d.xpath('url').text.strip
    # doc['createdDate'] = d.xpath('createdDate').text.strip
    # doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
    # doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
    document_license = d.xpath('documentLicense')
    if !document_license.empty?
      license = Puree::Model::CopyrightLicense.new
      license.name = document_license.xpath('term/localizedString').text.strip
      license.url = document_license.xpath('description/localizedString').text.strip
      doc.license = license if license.data?
    end
    docs << doc
  end
  docs.uniq { |d| d.url }
end

#keywordsArray<String>

Returns:

  • (Array<String>)


65
66
67
68
69
# File 'lib/puree/xml_extractor/dataset.rb', line 65

def keywords
  xpath_result =  xpath_query '/keywordGroups/keywordGroup/keyword/userDefinedKeyword/freeKeyword'
  data_arr = xpath_result.map { |i| i.text.strip }
  data_arr.uniq
end

Returns:



72
73
74
75
76
77
78
79
80
81
82
# File 'lib/puree/xml_extractor/dataset.rb', line 72

def legal_conditions
  xpath_result = xpath_query '/legalConditions/legalCondition'
  data = []
  xpath_result.each { |i|
    model =  Puree::Model::LegalCondition.new
    model.name = i.xpath('typeClassification/term/localizedString').text.strip
    model.description = i.xpath('description').text.strip
    data << model
  }
  data.uniq { |d| d.name }
end

Returns:



85
86
87
88
89
90
91
92
93
94
95
# File 'lib/puree/xml_extractor/dataset.rb', line 85

def links
  xpath_result = xpath_query '/links/link'
  data = []
  xpath_result.each { |i|
    model =  Puree::Model::Link.new
    model.description = i.xpath('description').text.strip
    model.url = i.xpath('url').text.strip
    data << model
  }
  data.uniq { |d| d.url }
end

#organisationsArray<Puree::Model::OrganisationHeader>



98
99
100
101
# File 'lib/puree/xml_extractor/dataset.rb', line 98

def organisations
  xpath_result = xpath_query '/organisations/organisation'
  Puree::XMLExtractor::Shared.organisation_multi_header xpath_result
end

#ownerPuree::Model::OrganisationHeader?



104
105
106
107
# File 'lib/puree/xml_extractor/dataset.rb', line 104

def owner
  xpath_result = xpath_query '/managedBy'
  Puree::XMLExtractor::Shared.organisation_header xpath_result
end

#persons_externalArray<Puree::Model::EndeavourPerson>

Returns:



115
116
117
# File 'lib/puree/xml_extractor/dataset.rb', line 115

def persons_external
  persons 'external'
end

#persons_internalArray<Puree::Model::EndeavourPerson>

Returns:



110
111
112
# File 'lib/puree/xml_extractor/dataset.rb', line 110

def persons_internal
  persons 'internal'
end

#persons_otherArray<Puree::Model::EndeavourPerson>

Returns:



120
121
122
# File 'lib/puree/xml_extractor/dataset.rb', line 120

def persons_other
  persons 'other'
end

#productionPuree::Model::TemporalRange?

Date of data production

Returns:



126
127
128
# File 'lib/puree/xml_extractor/dataset.rb', line 126

def production
  temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
end

#projectsArray<Puree::Model::RelatedContentHeader>



131
132
133
# File 'lib/puree/xml_extractor/dataset.rb', line 131

def projects
  associated_type('Research').uniq
end

#publicationsArray<Puree::Model::RelatedContentHeader>



136
137
138
139
140
141
142
143
144
# File 'lib/puree/xml_extractor/dataset.rb', line 136

def publications
  data_arr = []
  associated.each do |i|
    if i.type != 'Research'
      data_arr << i
    end
  end
  data_arr
end

#publisherString?

Returns:

  • (String, nil)


147
148
149
# File 'lib/puree/xml_extractor/dataset.rb', line 147

def publisher
  xpath_query_for_single_value '/publisher/name'
end

#spatial_placesArray<String>

Returns:

  • (Array<String>)


152
153
154
155
156
157
158
159
160
# File 'lib/puree/xml_extractor/dataset.rb', line 152

def spatial_places
  # Data from free-form text box
  xpath_result = xpath_query '/geographicalCoverage/localizedString'
  data = []
  xpath_result.each do |i|
    data << i.text.strip
  end
  data.uniq
end

#spatial_pointPuree::Model::SpatialPoint?

Spatial coverage point

Returns:



164
165
166
167
168
169
170
171
172
173
174
# File 'lib/puree/xml_extractor/dataset.rb', line 164

def spatial_point
  xpath_result = xpath_query '/geoLocation/point'
  point = Puree::Model::SpatialPoint.new
  if !xpath_result[0].nil?
    arr = xpath_result.text.split(',')
    point.latitude = arr[0].strip.to_f
    point.longitude = arr[1].strip.to_f
    point
  end
  nil
end

#temporalPuree::Model::TemporalRange?

Temporal coverage

Returns:



183
184
185
# File 'lib/puree/xml_extractor/dataset.rb', line 183

def temporal
  temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
end

#titleString?

Returns:

  • (String, nil)


188
189
190
# File 'lib/puree/xml_extractor/dataset.rb', line 188

def title
  xpath_query_for_single_value '/title/localizedString'
end