Class: TaxPub

Inherits:
Object
  • Object
show all
Defined in:
lib/taxpub.rb,
lib/taxpub/utils.rb,
lib/taxpub/version.rb,
lib/taxpub/reference.rb,
lib/taxpub/validator.rb,
lib/taxpub/exceptions.rb

Defined Under Namespace

Classes: Error, InvalidParameterValueError, InvalidTypeError, Reference, Utils, Validator

Constant Summary collapse

VERSION =
"0.1.0"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeTaxPub

Returns a new instance of TaxPub.



13
14
15
16
# File 'lib/taxpub.rb', line 13

def initialize
  @parameters = {}
  @doc = {}
end

Class Method Details

.versionObject



4
5
6
# File 'lib/taxpub/version.rb', line 4

def self.version
  VERSION
end

Instance Method Details

#abstractObject

Get the abstract



114
115
116
117
118
119
# File 'lib/taxpub.rb', line 114

def abstract
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/abstract"
  a = @doc.xpath(xpath).text
  Utils.clean_text(a)
end

#authorsObject

Get the authors



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/taxpub.rb', line 134

def authors
  Validator.validate_nokogiri(@doc)
  data = []
  xpath = "//*/contrib[@contrib-type='author']"
  @doc.xpath(xpath).each do |author|
    affiliations = []
    author.xpath("xref/@rid").each do |rid|
      xpath = "//*/aff[@id='#{rid}']/addr-line"
      affiliations << Utils.clean_text(@doc.xpath(xpath).text)
    end
    orcid = author.xpath("uri[@content-type='orcid']").text
    given = Utils.clean_text(author.xpath("name/given-names").text)
    surname = Utils.clean_text(author.xpath("name/surname").text)
    data << {
      given: given,
      surname: surname,
      fullname: [given, surname].join(" "),
      email: author.xpath("email").text,
      affiliations: affiliations,
      orcid: orcid
    }
  end
  data
end

#conferenceObject

Get the conference metadata



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/taxpub.rb', line 173

def conference
  Validator.validate_nokogiri(@doc)
  xpath = "//*/conference"
  conf = @doc.xpath(xpath)
  return {} if conf.empty?
  session_xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
  session = Utils.clean_text(@doc.xpath(session_xpath).text)
  presenter_xpath = "//*/sec[@sec-type='Presenting author']/p"
  presenter = Utils.clean_text(@doc.xpath(presenter_xpath).text)
  {
    date: Utils.clean_text(conf.at_xpath("conf-date").text),
    name: Utils.clean_text(conf.at_xpath("conf-name").text),
    acronym: Utils.clean_text(conf.at_xpath("conf-acronym").text),
    location: Utils.clean_text(conf.at_xpath("conf-loc").text),
    theme: Utils.clean_text(conf.at_xpath("conf-theme").text),
    session: session,
    presenter: presenter
  }
end

#contentObject

Get the raw text content of the Nokogiri document



87
88
89
90
# File 'lib/taxpub.rb', line 87

def content
  Validator.validate_nokogiri(@doc)
  Utils.clean_text(@doc.text)
end

#corresponding_authorObject

Get the corresponding author



163
164
165
166
167
168
# File 'lib/taxpub.rb', line 163

def corresponding_author
  Validator.validate_nokogiri(@doc)
  xpath = "//*/author-notes/fn[@fn-type='corresp']/p"
  author_string = Utils.clean_text(@doc.xpath(xpath).text)
  author_string.gsub("Corresponding author: ", "").chomp(".")
end

#docObject

View the parsed Nokogiri document



74
75
76
# File 'lib/taxpub.rb', line 74

def doc
  @doc
end

#doiObject

Get the DOI



95
96
97
98
99
# File 'lib/taxpub.rb', line 95

def doi
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/article-id[@pub-id-type='doi']"
  Utils.expand_doi(@doc.xpath(xpath).text)
end

#figuresObject

Get the figures



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/taxpub.rb', line 230

def figures
  Validator.validate_nokogiri(@doc)
  data = []
  xpath = "//*/fig"
  @doc.xpath(xpath).each do |fig|
    data << {
      label: Utils.clean_text(fig.xpath("label").text),
      caption: Utils.clean_text(fig.xpath("caption").text),
      graphic: {
        href: fig.xpath("graphic").attribute("href").text,
        id: fig.xpath("graphic").attribute("id").text
      }
    }
  end
  data
end

#file_pathObject



54
55
56
# File 'lib/taxpub.rb', line 54

def file_path
  @parameters[:file].path rescue nil
end

#file_path=(file_path) ⇒ Object

Set a file path for a TaxPub XML file

Example

instance.file_path = "/Users/jane/Desktop/taxpub.xml"


49
50
51
52
# File 'lib/taxpub.rb', line 49

def file_path=(file_path)
  Validator.validate_type(file_path, 'File')
  @parameters[:file] = File.new(file_path, "r")
end

#keywordsObject

Get the keywords



124
125
126
127
128
129
# File 'lib/taxpub.rb', line 124

def keywords
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/kwd-group/kwd"
  @doc.xpath(xpath)
      .map{|a| Utils.clean_text(a.text)}
end

#occurrencesObject

Get occurrences with dwc keys



212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/taxpub.rb', line 212

def occurrences
  Validator.validate_nokogiri(@doc)
  data = []
  xpath = "//*/list[@list-content='occurrences']/list-item"
  @doc.xpath(xpath).each do |occ|
    obj = {}
    occ.xpath("*/named-content").each do |dwc|
      prefix = dwc.attributes["content-type"].text.gsub(/dwc\:/, "")
      obj[prefix.to_sym] = dwc.text
    end
    data << obj
  end
  data
end

#paramsObject

View the built parameters



21
22
23
# File 'lib/taxpub.rb', line 21

def params
  @parameters
end

#parseObject

Build the Nokogiri document



61
62
63
64
65
66
67
68
69
# File 'lib/taxpub.rb', line 61

def parse
  if url
    @doc = Nokogiri::XML(open(url))
  elsif file_path
    @doc = File.open(file_path) { |f| Nokogiri::XML(f) }
  end
  Validator.validate_nokogiri(@doc)
  @doc
end

#referencesObject

Get the cited references



250
251
252
253
254
# File 'lib/taxpub.rb', line 250

def references
  Validator.validate_nokogiri(@doc)
  xpath = "//*/ref-list/ref"
  @doc.xpath(xpath).map{ |r| Reference.parse(r) }
end

#scientific_names(hsh = {}) ⇒ Object

Get the taxa

Attributes

  • hsh - Hash { with_ranks: true } for scientific names returned with ranks as keys



200
201
202
203
204
205
206
# File 'lib/taxpub.rb', line 200

def scientific_names(hsh = {})
  if hsh[:with_ranks]
    scientific_names_with_ranks
  else
    scientific_names_with_ranks.map{ |s| s.values.join(" ") }
  end
end

#titleObject

Get the title



104
105
106
107
108
109
# File 'lib/taxpub.rb', line 104

def title
  Validator.validate_nokogiri(@doc)
  xpath = "//*/article-meta/title-group/article-title"
  t = @doc.xpath(xpath).text
  Utils.clean_text(t)
end

#typeObject



78
79
80
81
82
# File 'lib/taxpub.rb', line 78

def type
  Validator.validate_nokogiri(@doc)
  xpath = "/article/@article-type"
  @doc.xpath(xpath).text
end

#urlObject



38
39
40
# File 'lib/taxpub.rb', line 38

def url
  @parameters[:url] || nil
end

#url=(url) ⇒ Object

Specify a remote TaxPub URL Source must be an xml file

Example

instance.url = "https://tdwgproceedings.pensoft.net/article/15141/download/xml/"


33
34
35
36
# File 'lib/taxpub.rb', line 33

def url=(url)
  Validator.validate_url(url)
  @parameters[:url] = url
end