Module: RelatonBib::BibXMLParser

Extended by:
BibXMLParser
Included in:
BibXMLParser
Defined in:
lib/relaton_bib/bibxml_parser.rb

Constant Summary collapse

SERIESINFONAMES =

SeriesInfo what should be saved as docidentifiers in the Relaton model.

["DOI"].freeze
RFCPREFIXES =
%w[RFC BCP FYI STD].freeze
FLAVOR =
nil
ORGNAMES =
{
  "IEEE" => "Istitute of Electrical and Electronics Engineers",
  "W3C" => "World Wide Web Consortium",
  "3GPP" => "3rd Generation Partnership Project",
}.freeze

Instance Method Summary collapse

Instance Method Details

#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>



195
196
197
198
199
200
201
202
# File 'lib/relaton_bib/bibxml_parser.rb', line 195

def abstracts(ref)
  ref.xpath("./front/abstract").map do |a|
    c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2')
      .gsub(/[\t\n]/, " ").squeeze " "
    FormattedString.new(content: c, language: language(ref), script: "Latn",
                        format: "text/html")
  end
end

#add_contact(conts, type, value) ⇒ Object



318
319
320
# File 'lib/relaton_bib/bibxml_parser.rb', line 318

def add_contact(conts, type, value)
  conts << Contact.new(type: type, value: value.text) if value
end

#address(postal) ⇒ Object



302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/relaton_bib/bibxml_parser.rb', line 302

def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
  street = [
    (postal.at("./postalLine") || postal.at("./street"))&.text,
  ].compact
  Address.new(
    street: street,
    city: postal.at("./city")&.text,
    postcode: postal.at("./code")&.text,
    country: postal.at("./country")&.text,
    state: postal.at("./region")&.text,
  )
end

#affiliation(author) ⇒ Array<RelatonBib::Affiliation>



263
264
265
266
267
268
269
# File 'lib/relaton_bib/bibxml_parser.rb', line 263

def affiliation(author)
  o = author.at("./organization")
  return [] if o.nil? || o.text.empty?

  org = new_org o.text, o[:abbrev]
  [Affiliation.new(organization: org)]
end

#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem



58
59
60
61
# File 'lib/relaton_bib/bibxml_parser.rb', line 58

def bib_item(**attrs)
  # attrs[:place] = ["Fremont, CA"]
  BibliographicItem.new(**attrs)
end

#committee(wgr) ⇒ RelatonBib::TechnicalCommittee



365
366
367
# File 'lib/relaton_bib/bibxml_parser.rb', line 365

def committee(wgr)
  TechnicalCommittee.new wgr
end

#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>



288
289
290
291
292
293
294
295
296
297
298
# File 'lib/relaton_bib/bibxml_parser.rb', line 288

def contacts(addr)
  conts = []
  return conts unless addr

  postal = addr.at("./postal")
  conts << address(postal) if postal
  add_contact(conts, "phone", addr.at("./phone"))
  add_contact(conts, "email", addr.at("./email"))
  add_contact(conts, "uri", addr.at("./uri"))
  conts
end

#contributor_role(author) ⇒ Hash



324
325
326
# File 'lib/relaton_bib/bibxml_parser.rb', line 324

def contributor_role(author)
  { type: author[:role] || "author" }
end

#contributors(reference) ⇒ Array<Hash>



206
207
208
209
210
211
212
213
# File 'lib/relaton_bib/bibxml_parser.rb', line 206

def contributors(reference)
  reference.xpath("./front/author").map do |contrib|
    if contrib[:fullname] || contrib[:surname] then person(contrib, reference)
    else organization(contrib)
    end
  end.compact
  # persons(reference) + organizations(reference)
end

#create_docid(id, ver) ⇒ Object

rubocop:disable Metrics/MethodLength



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/relaton_bib/bibxml_parser.rb', line 112

def create_docid(id, ver) # rubocop:disable Metrics/MethodLength
  pref, num = id_to_pref_num(id)
  if RFCPREFIXES.include?(pref)
    pid = "#{pref} #{num.sub(/^-?0+/, '')}"
    type = pubid_type id
  elsif %w[I-D draft].include?(pref)
    pid = "draft-#{num}"
    pid.sub!(/(?<=-)\d{2}$/, ver) if ver
    type = "Internet-Draft"
  else
    pid = pref ? "#{pref} #{num}" : id
    type = pubid_type id
  end
  DocumentIdentifier.new(type: type, id: pid, primary: true)
end

#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>

Extract date from reference.



342
343
344
345
346
347
348
349
350
351
# File 'lib/relaton_bib/bibxml_parser.rb', line 342

def dates(reference) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize
  date = reference.at "./front/date"
  return [] if date.nil? || date[:year].nil? || date[:year].empty?

  d = date[:year]
  d += "-#{month(date[:month])}" if date[:month] && !date[:month].empty?
  d += "-#{date[:day]}" if date[:day]
  # date = Time.parse(d).strftime "%Y-%m-%d"
  [BibliographicDate.new(type: "published", on: d)]
end

#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>

Extract document identifiers from reference



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/relaton_bib/bibxml_parser.rb', line 77

def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
  ret = []
  si = reference.at("./seriesInfo[@name='Internet-Draft']",
                    "./front/seriesInfo[@name='Internet-Draft']")
  if si
    id = si[:value]
    id.sub!(/(?<=-)\d{2}$/, ver) if ver
    ret << DocumentIdentifier.new(type: "Internet-Draft", id: id, primary: true)
  else
    id = reference[:anchor] || reference[:docName] || reference[:number]
    ret << create_docid(id, ver) if id
  end

  %w[anchor docName number].each do |atr|
    if reference[atr]
      pref, num = id_to_pref_num reference[atr]
      atrid = if atr == "anchor" && RFCPREFIXES.include?(pref)
                "#{pref}#{num.sub(/^-?0+/, '')}"
              else
                reference[atr]
              end
      type = pubid_type id
      ret << DocumentIdentifier.new(id: atrid, type: type, scope: atr)
    end
  end

  ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next unless SERIESINFONAMES.include? si[:name]

    id = si[:value]
    # id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
    DocumentIdentifier.new(id: id, type: si[:name])
  end.compact
end

#docnumber(reference) ⇒ Object



52
53
54
# File 'lib/relaton_bib/bibxml_parser.rb', line 52

def docnumber(reference)
  reference[:anchor]&.sub(/^\w+\./, "")
end

#doctype(anchor) ⇒ String



395
396
397
398
399
400
401
# File 'lib/relaton_bib/bibxml_parser.rb', line 395

def doctype(anchor)
  case anchor
  when /I-D/ then "internet-draft"
  when /IEEE/ then "ieee"
  else "rfc"
  end
end

#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?



355
356
357
358
359
360
361
# File 'lib/relaton_bib/bibxml_parser.rb', line 355

def editorialgroup(reference)
  tc = reference.xpath("./front/workgroup").map do |ed|
    wg = WorkGroup.new name: ed.text
    committee wg
  end
  EditorialGroup.new tc if tc.any?
end

#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/relaton_bib/bibxml_parser.rb', line 25

def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  return unless reference

  hash = {
    is_relation: is_relation,
    docnumber: docnumber(reference),
    type: "standard",
    docid: docids(reference, ver),
    status: status(reference),
    language: [language(reference)],
    script: ["Latn"],
    link: link(reference, url, ver),
    title: titles(reference),
    formattedref: formattedref(reference),
    abstract: abstracts(reference),
    contributor: contributors(reference),
    relation: relations(reference),
    date: dates(reference),
    editorialgroup: editorialgroup(reference),
    series: series(reference),
    keyword: reference.xpath("front/keyword").map(&:text),
    doctype: doctype(reference[:anchor]),
  }
  # hash[:fetched] = Date.today.to_s unless is_relation
  bib_item(**hash)
end

#formattedref(reference) ⇒ RelatonBib::FormattedRef?



182
183
184
185
186
187
188
189
190
191
# File 'lib/relaton_bib/bibxml_parser.rb', line 182

def formattedref(reference)
  return if reference.at "./front/title"

  cont = (reference[:anchor] || reference[:docName] || reference[:number])
  if cont
    FormattedRef.new(
      content: cont, language: language(reference), script: "Latn",
    )
  end
end

#full_name(author, reference) ⇒ RelatonBib::FullName



252
253
254
255
256
257
258
259
# File 'lib/relaton_bib/bibxml_parser.rb', line 252

def full_name(author, reference)
  lang = language reference
  FullName.new(
    completename: localized_string(author[:fullname], lang),
    initial: [localized_string(author[:initials], lang)].compact,
    surname: localized_string(author[:surname], lang),
  )
end

#id_to_pref_num(id) ⇒ Object



128
129
130
131
# File 'lib/relaton_bib/bibxml_parser.rb', line 128

def id_to_pref_num(id)
  tn = /^(?<pref>I-D|draft|3GPP|W3C|[A-Z]{2,})[._-]?(?<num>.+)/.match id
  tn && tn.to_a[1..2]
end

#language(reference) ⇒ String



65
66
67
# File 'lib/relaton_bib/bibxml_parser.rb', line 65

def language(reference)
  reference[:lang] || "en"
end


159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/relaton_bib/bibxml_parser.rb', line 159

def link(reference, url, ver)
  l = []
  l << { type: "xml", content: url } if url
  l << { type: "src", content: reference[:target] } if reference[:target]
  if /^I-D/.match? reference[:anchor]
    reference.xpath("format").each do |f|
      c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
      l << { type: f[:type], content: c }
    end
  end
  l
end

#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?



282
283
284
# File 'lib/relaton_bib/bibxml_parser.rb', line 282

def localized_string(content, lang)
  LocalizedString.new(content, lang) if content
end

#month(mon) ⇒ Object



369
370
371
372
373
374
# File 'lib/relaton_bib/bibxml_parser.rb', line 369

def month(mon)
  # return 1 if !mon || mon.empty?
  return mon if /^\d+$/.match? mon

  Date::MONTHNAMES.index { |m| m&.include? mon }.to_s.rjust 2, "0"
end

#new_org(name, abbr) ⇒ RelatonBib::Organization



274
275
276
277
# File 'lib/relaton_bib/bibxml_parser.rb', line 274

def new_org(name, abbr)
  # (name = "Internet Engineering Task Force", abbr = "IETF")
  Organization.new name: name, abbreviation: abbr
end

#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/relaton_bib/bibxml_parser.rb', line 233

def organization(contrib)
  # publisher = { entity: new_org, role: [type: "publisher"] }
  # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si|
  #   next mem unless si[:stream]

  #   mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
  # end
  # orgs + reference.xpath(
  #   "front/author[not(@surname)][not(@fullname)]/organization",
  # ).map do |org|
  org = contrib.at("./organization")
  name = ORGNAMES[org.text] || org.text
  { entity: new_org(name, org[:abbrev]), role: [contributor_role(contrib)] }
  # end
end

#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object



15
16
17
18
# File 'lib/relaton_bib/bibxml_parser.rb', line 15

def parse(bibxml, url: nil, is_relation: false, ver: nil)
  doc = Nokogiri::XML bibxml
  fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver
end

#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>



218
219
220
221
222
223
224
225
226
227
228
# File 'lib/relaton_bib/bibxml_parser.rb', line 218

def person(author, reference)
  # reference.xpath("./front/author[@surname]|./front/author[@fullname]")
  #   .map do |author|
  entity = Person.new(
    name: full_name(author, reference),
    affiliation: affiliation(author),
    contact: contacts(author.at("./address")),
  )
  { entity: entity, role: [contributor_role(author)] }
  # end
end

#pubid_type(id) ⇒ String

Extract document identifier type from identifier



140
141
142
# File 'lib/relaton_bib/bibxml_parser.rb', line 140

def pubid_type(id)
  id_to_pref_num(id)&.first
end

#relations(reference) ⇒ Hash



330
331
332
333
334
# File 'lib/relaton_bib/bibxml_parser.rb', line 330

def relations(reference)
  reference.xpath("reference").map do |ref|
    { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
  end
end

#series(reference) ⇒ Array<RelatonBib::Series>

Extract series form reference



382
383
384
385
386
387
388
389
390
391
# File 'lib/relaton_bib/bibxml_parser.rb', line 382

def series(reference)
  reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next if SERIESINFONAMES.include?(si[:name]) || si[:stream] || si[:status]

    t = TypedTitleString.new(
      content: si[:name], language: language(reference), script: "Latn",
    )
    Series.new(title: t, number: si[:value], type: "main")
  end.compact
end

#status(reference) ⇒ RelatonBib::DocumentStatus

extract status



150
151
152
153
# File 'lib/relaton_bib/bibxml_parser.rb', line 150

def status(reference)
  st = reference.at("./seriesinfo[@status]")
  DocumentStatus.new(stage: st[:status]) if st
end

#titles(reference) ⇒ Array<Hash>



174
175
176
177
178
# File 'lib/relaton_bib/bibxml_parser.rb', line 174

def titles(reference)
  reference.xpath("./front/title").map do |title|
    { content: title.text, language: language(reference), script: "Latn" }
  end
end