Class: RelatonBipm::RawdataBipmMetrologia::ArticleParser
- Inherits:
-
Object
- Object
- RelatonBipm::RawdataBipmMetrologia::ArticleParser
- Defined in:
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
Constant Summary collapse
- ATTRS =
%i[docid title contributor date copyright abstract relation series extent type doctype link].freeze
Class Method Summary collapse
-
.parse(path) ⇒ RelatonBipm::BipmBibliographicItem
Create new parser and parse document.
Instance Method Summary collapse
-
#affiliation(contrib) ⇒ Array<RelatonBib::Affiliation>
Parse affiliations.
-
#bibitem(date, type) ⇒ RelatonBipm::BipmBibliographicItem
Create bibitem.
-
#create_docid(id, type, primary = nil) ⇒ RelatonBib::DocumentIdentifier
Create document identifier.
- #create_organization(contrib) ⇒ Object
- #create_person(contrib) ⇒ Object
- #date_part(date, type) ⇒ Object
-
#dates {|date, type| ... } ⇒ Array<String, Object>
Parse date.
-
#fullname(name) ⇒ RelatonBib::FullName
Create full name.
-
#initialize(doc, journal, volume, article) ⇒ ArticleParser
constructor
Initialize parser.
-
#journal_title ⇒ String
Parse journal title.
-
#parse ⇒ RelatonBipm::BipmBibliographicItem
Create new document.
-
#parse_abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
- #parse_address(aff) ⇒ Object
- #parse_affiliation(aff) ⇒ Object
-
#parse_contributor ⇒ Array<RelatonBib::Contributor>
Parse contributor.
-
#parse_copyright ⇒ Array<RelatonBib::CopyrightAssociation>
Parse copyright.
-
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date.
- #parse_division(aff) ⇒ Object
-
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse docid.
- #parse_doctype ⇒ Object
-
#parse_extent ⇒ Array<RelatonBib::Extent>
Parse extent.
- #parse_link ⇒ Object
-
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parese relation.
-
#parse_series ⇒ Array<RelatonBib::Series>
Parse series.
-
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Parse title.
- #parse_type ⇒ Object
-
#volume_issue_article ⇒ String
Parse volume, issue and page.
Constructor Details
#initialize(doc, journal, volume, article) ⇒ ArticleParser
Initialize parser
27 28 29 30 31 32 33 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 27 def initialize(doc, journal, volume, article) @doc = doc.at "/article" @journal = journal @volume = volume @article = article @meta = doc.at("/article/front/article-meta") end |
Class Method Details
.parse(path) ⇒ RelatonBipm::BipmBibliographicItem
Create new parser and parse document
13 14 15 16 17 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 13 def self.parse(path) doc = Nokogiri::XML(File.read(path, encoding: "UTF-8")) journal, volume, article = path.split("/")[-2].split("_")[1..] new(doc, journal, volume, article).parse end |
Instance Method Details
#affiliation(contrib) ⇒ Array<RelatonBib::Affiliation>
Parse affiliations
140 141 142 143 144 145 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 140 def affiliation(contrib) contrib.xpath("./xref[@ref-type='aff']").map do |x| a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']") # /label/following-sibling::node()") parse_affiliation a end.compact end |
#bibitem(date, type) ⇒ RelatonBipm::BipmBibliographicItem
Create bibitem
308 309 310 311 312 313 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 308 def bibitem(date, type) dt = RelatonBib::BibliographicDate.new(type: type, on: date) carrier = type == "epub" ? "online" : "print" medium = RelatonBib::Medium.new carrier: carrier BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium end |
#create_docid(id, type, primary = nil) ⇒ RelatonBib::DocumentIdentifier
Create document identifier
90 91 92 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 90 def create_docid(id, type, primary = nil) RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary end |
#create_organization(contrib) ⇒ Object
129 130 131 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 129 def create_organization(contrib) RelatonBib::Organization.new name: contrib.at("./collab").text end |
#create_person(contrib) ⇒ Object
122 123 124 125 126 127 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 122 def create_person(contrib) name = contrib.at("./name") return unless name RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib) end |
#date_part(date, type) ⇒ Object
250 251 252 253 254 255 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 250 def date_part(date, type) part = date.at("./#{type}")&.text return "01" if part.nil? || part.empty? part.rjust(2, "0") end |
#dates {|date, type| ... } ⇒ Array<String, Object>
Parse date
241 242 243 244 245 246 247 248 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 241 def dates @meta.xpath("./pub-date").map do |d| month = date_part(d, "month") day = date_part(d, "day") date = "#{d.at('./year').text}-#{month}-#{day}" block_given? ? yield(date, d[:"pub-type"]) : date end end |
#fullname(name) ⇒ RelatonBib::FullName
Create full name
196 197 198 199 200 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 196 def fullname(name) cname = [name.at("./given-names"), name.at("./surname")].compact.map(&:text).join(" ") completename = RelatonBib::LocalizedString.new cname, "en", "Latn" RelatonBib::FullName.new completename: completename end |
#journal_title ⇒ String
Parse journal title
77 78 79 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 77 def journal_title @doc.at("./front/journal-meta/journal-title-group/journal-title").text end |
#parse ⇒ RelatonBipm::BipmBibliographicItem
Create new document
40 41 42 43 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 40 def parse attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] } BipmBibliographicItem.new(**attrs) end |
#parse_abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract
281 282 283 284 285 286 287 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 281 def parse_abstract @meta.xpath("./abstract").map do |a| RelatonBib::FormattedString.new( content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html", ) end end |
#parse_address(aff) ⇒ Object
177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 177 def parse_address(aff) address = [] addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "") address << addr unless addr.empty? country = aff.at('country') address << country.text if country && !country.text.empty? address = address.join(", ") return [] if address.empty? [RelatonBib::Address.new(formatted_address: address)] end |
#parse_affiliation(aff) ⇒ Object
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 147 def parse_affiliation(aff) text = aff.xpath("text()|sup|sub").to_xml.split(",").map(&:strip).reject(&:empty?).join(", ") text = CGI::unescapeHTML(text) return if text.include?("Permanent address:") || text == "Germany" || text.start_with?("Guest") || text.start_with?("Deceased") || text.include?("Author to whom any correspondence should be addressed") args = {} institution = aff.at('institution') if institution name = institution.text return if name == "1005 Southover Lane" args[:subdivision] = parse_division(aff) args[:contact] = parse_address(aff) else name = text end args[:name] = [RelatonBib::LocalizedString.new(name)] org = RelatonBib::Organization.new(**args) RelatonBib::Affiliation.new(organization: org) end |
#parse_contributor ⇒ Array<RelatonBib::Contributor>
Parse contributor
115 116 117 118 119 120 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 115 def parse_contributor @meta.xpath("./contrib-group/contrib").map do |c| entity = create_person(c) || create_organization(c) RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]]) end end |
#parse_copyright ⇒ Array<RelatonBib::CopyrightAssociation>
Parse copyright
262 263 264 265 266 267 268 269 270 271 272 273 274 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 262 def parse_copyright @meta.xpath("./permissions").each_with_object([]) do |l, m| from = l.at("./copyright-year") next unless from owner = l.at("./copyright-statement").text.split(" & ").map do |c| /(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c org = RelatonBib::Organization.new name: name RelatonBib::ContributionInfo.new(entity: org) end m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text) end end |
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date
229 230 231 232 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 229 def parse_date on = dates.min [RelatonBib::BibliographicDate.new(type: "published", on: on)] end |
#parse_division(aff) ⇒ Object
170 171 172 173 174 175 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 170 def parse_division(aff) div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "") return [] if div.empty? [RelatonBib::LocalizedString.new(div)] end |
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse docid
50 51 52 53 54 55 56 57 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 50 def parse_docid pubid = "#{journal_title} #{volume_issue_article}" primary_id = create_docid pubid, "BIPM", true @meta.xpath("./article-id[@pub-id-type='doi']") .each_with_object([primary_id]) do |id, m| m << create_docid(id.text, id["pub-id-type"]) end end |
#parse_doctype ⇒ Object
352 353 354 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 352 def parse_doctype DocumentType.new type: "article" end |
#parse_extent ⇒ Array<RelatonBib::Extent>
Parse extent
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 332 def parse_extent locs = @meta.xpath("./volume|./issue|./fpage").map do |e| if e.name == "fpage" type = "page" to = @meta.at("./lpage")&.text else type = e.name end RelatonBib::Locality.new type, e.text, to end [RelatonBib::Extent.new(locs)] # %w[volume issue page].map.with_index do |t, i| # RelatonBib::Locality.new t, volume_issue_page[i] # end end |
#parse_link ⇒ Object
356 357 358 359 360 361 362 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 356 def parse_link @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a| url = "https://doi.org/#{l.text}" a << RelatonBib::TypedUri.new(content: url, type: "src") a << RelatonBib::TypedUri.new(content: url, type: "doi") end end |
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parese relation
294 295 296 297 298 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 294 def parse_relation dates do |d, t| RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t)) end end |
#parse_series ⇒ Array<RelatonBib::Series>
Parse series
320 321 322 323 324 325 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 320 def parse_series title = RelatonBib::TypedTitleString.new( content: journal_title, language: ["en"], script: ["Latn"], ) [RelatonBib::Series.new(title: title)] end |
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Parse title
99 100 101 102 103 104 105 106 107 108 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 99 def parse_title @meta.xpath("./title-group/article-title").map do |t| next if t.text.empty? format = CGI.escapeHTML(t.inner_html) == t.inner_html ? "text/plain" : "text/html" RelatonBib::TypedTitleString.new( content: t.inner_html, language: t[:"xml:lang"], script: "Latn", format: format, ) end.compact end |
#parse_type ⇒ Object
348 349 350 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 348 def parse_type "article" end |
#volume_issue_article ⇒ String
Parse volume, issue and page
64 65 66 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 64 def volume_issue_article [@journal, @volume, @article].compact.join(" ") end |