Class: Puree::XMLExtractor::Dataset

Inherits:
Resource show all
Includes:
AssociatedMixin, WorkflowStateMixin
Defined in:
lib/puree/xml_extractor/dataset.rb

Overview

Dataset XML extractor.

Instance Method Summary collapse

Methods included from WorkflowStateMixin

#workflow_state

Methods included from AssociatedMixin

#associated

Methods inherited from Resource

#created, #get_data?, #locale, #modified, #uuid, #xpath_query

Methods inherited from Base

#xpath_query_for_multi_value, #xpath_query_for_single_value

Constructor Details

#initialize(xml:) ⇒ Dataset

Returns a new instance of Dataset.



11
12
13
14
# File 'lib/puree/xml_extractor/dataset.rb', line 11

def initialize(xml:)
  super
  @resource_type = :dataset
end

Instance Method Details

#accessString?

Open access permission

Returns:

  • (String, nil)


18
19
20
# File 'lib/puree/xml_extractor/dataset.rb', line 18

def access
  xpath_query_for_single_value '/openAccessPermission/term/localizedString'
end

#availableTime?

Date made available

Returns:

  • (Time, nil)


24
25
26
# File 'lib/puree/xml_extractor/dataset.rb', line 24

def available
  Puree::Util::Date.hash_to_time temporal_date('dateMadeAvailable')
end

#descriptionString?

Returns:

  • (String, nil)


29
30
31
# File 'lib/puree/xml_extractor/dataset.rb', line 29

def description
  xpath_query_for_single_value '/descriptions/classificationDefinedField/value/localizedString'
end

#doiString?

Digital Object Identifier

Returns:

  • (String, nil)


35
36
37
# File 'lib/puree/xml_extractor/dataset.rb', line 35

def doi
  xpath_query_for_single_value '/doi'
end

#filesArray<Puree::Model::File>

Supporting files

Returns:



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/puree/xml_extractor/dataset.rb', line 41

def files
  xpath_result = xpath_query '/documents/document'
  docs = []
  xpath_result.each do |d|
    doc = Puree::Model::File.new
    doc.name = d.xpath('fileName').text.strip
    doc.mime = d.xpath('mimeType').text.strip
    doc.size = d.xpath('size').text.strip.to_i
    doc.url = d.xpath('url').text.strip
    # doc['createdDate'] = d.xpath('createdDate').text.strip
    # doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
    # doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
    document_license = d.xpath('documentLicense')
    if !document_license.empty?
      license = Puree::Model::CopyrightLicense.new
      license.name = document_license.xpath('term/localizedString').text.strip
      license.url = document_license.xpath('description/localizedString').text.strip
      doc.license = license if license.data?
    end
    docs << doc
  end
  docs.uniq { |d| d.url }
end

#keywordsArray<String>

Returns:

  • (Array<String>)


66
67
68
69
70
# File 'lib/puree/xml_extractor/dataset.rb', line 66

def keywords
  xpath_result =  xpath_query '/keywordGroups/keywordGroup/keyword/userDefinedKeyword/freeKeyword'
  data_arr = xpath_result.map { |i| i.text.strip }
  data_arr.uniq
end

Returns:



73
74
75
76
77
78
79
80
81
82
83
# File 'lib/puree/xml_extractor/dataset.rb', line 73

def legal_conditions
  xpath_result = xpath_query '/legalConditions/legalCondition'
  data = []
  xpath_result.each { |i|
    model =  Puree::Model::LegalCondition.new
    model.name = i.xpath('typeClassification/term/localizedString').text.strip
    model.description = i.xpath('description').text.strip
    data << model
  }
  data.uniq { |d| d.name }
end

Returns:



86
87
88
89
90
91
92
93
94
95
96
# File 'lib/puree/xml_extractor/dataset.rb', line 86

def links
  xpath_result = xpath_query '/links/link'
  data = []
  xpath_result.each { |i|
    model =  Puree::Model::Link.new
    model.description = i.xpath('description').text.strip
    model.url = i.xpath('url').text.strip
    data << model
  }
  data.uniq { |d| d.url }
end

#organisationsArray<Puree::Model::OrganisationHeader>



99
100
101
102
# File 'lib/puree/xml_extractor/dataset.rb', line 99

def organisations
  xpath_result = xpath_query '/organisations/organisation'
  Puree::XMLExtractor::Shared.organisation_multi_header xpath_result
end

#ownerPuree::Model::OrganisationHeader?



105
106
107
108
# File 'lib/puree/xml_extractor/dataset.rb', line 105

def owner
  xpath_result = xpath_query '/managedBy'
  Puree::XMLExtractor::Shared.organisation_header xpath_result
end

#persons_externalArray<Puree::Model::EndeavourPerson>

Returns:



116
117
118
# File 'lib/puree/xml_extractor/dataset.rb', line 116

def persons_external
  persons 'external'
end

#persons_internalArray<Puree::Model::EndeavourPerson>

Returns:



111
112
113
# File 'lib/puree/xml_extractor/dataset.rb', line 111

def persons_internal
  persons 'internal'
end

#persons_otherArray<Puree::Model::EndeavourPerson>

Returns:



121
122
123
# File 'lib/puree/xml_extractor/dataset.rb', line 121

def persons_other
  persons 'other'
end

#productionPuree::Model::TemporalRange?

Date of data production

Returns:



127
128
129
# File 'lib/puree/xml_extractor/dataset.rb', line 127

def production
  temporal_range 'dateOfDataProduction', 'endDateOfDataProduction'
end

#projectsArray<Puree::Model::RelatedContentHeader>



132
133
134
# File 'lib/puree/xml_extractor/dataset.rb', line 132

def projects
  associated_type('Research').uniq
end

#publicationsArray<Puree::Model::RelatedContentHeader>



137
138
139
140
141
142
143
144
145
# File 'lib/puree/xml_extractor/dataset.rb', line 137

def publications
  data_arr = []
  associated.each do |i|
    if i.type != 'Research'
      data_arr << i
    end
  end
  data_arr
end

#publisherString?

Returns:

  • (String, nil)


148
149
150
# File 'lib/puree/xml_extractor/dataset.rb', line 148

def publisher
  xpath_query_for_single_value '/publisher/name'
end

#spatial_placesArray<String>

Returns:

  • (Array<String>)


153
154
155
156
157
158
159
160
161
# File 'lib/puree/xml_extractor/dataset.rb', line 153

def spatial_places
  # Data from free-form text box
  xpath_result = xpath_query '/geographicalCoverage/localizedString'
  data = []
  xpath_result.each do |i|
    data << i.text.strip
  end
  data.uniq
end

#spatial_pointPuree::Model::SpatialPoint?

Spatial coverage point

Returns:



165
166
167
168
169
170
171
172
173
174
175
# File 'lib/puree/xml_extractor/dataset.rb', line 165

def spatial_point
  xpath_result = xpath_query '/geoLocation/point'
  point = Puree::Model::SpatialPoint.new
  if !xpath_result[0].nil?
    arr = xpath_result.text.split(',')
    point.latitude = arr[0].strip.to_f
    point.longitude = arr[1].strip.to_f
    point
  end
  nil
end

#temporalPuree::Model::TemporalRange?

Temporal coverage

Returns:



179
180
181
# File 'lib/puree/xml_extractor/dataset.rb', line 179

def temporal
  temporal_range 'temporalCoverageStartDate', 'temporalCoverageEndDate'
end

#titleString?

Returns:

  • (String, nil)


184
185
186
# File 'lib/puree/xml_extractor/dataset.rb', line 184

def title
  xpath_query_for_single_value '/title/localizedString'
end