Module: RelatonItu::DataParserR

Extended by:
DataParserR
Included in:
DataParserR
Defined in:
lib/relaton_itu/data_parser_r.rb

Instance Method Summary collapse

Instance Method Details

#fetch_abstract(doc) ⇒ Array<RelatonBib::FormattedString>

Parameters:

  • doc (Mechanize::Page)

Returns:

  • (Array<RelatonBib::FormattedString>)


43
44
45
46
47
48
# File 'lib/relaton_itu/data_parser_r.rb', line 43

def fetch_abstract(doc)
  doc.xpath('//h3[.="Observation"]/parent::td/following-sibling::td[2]').map do |a|
    c = a.text.strip
    RelatonBib::FormattedString.new content: c, language: "en", script: "Latn" unless c.empty?
  end.compact
end

#fetch_date(doc) ⇒ Araay<RelatonBib::BibliographicDate>

Parameters:

  • doc (Mechanize::Page)

Returns:

  • (Araay<RelatonBib::BibliographicDate>)


52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/relaton_itu/data_parser_r.rb', line 52

def fetch_date(doc)
  dates = []
  date = doc.at('//h3[.="Approval_Date"]/parent::td/following-sibling::td[2]',
                '//h3[.="Approval date"]/parent::td/following-sibling::td[2]',
                '//h3[.="Approval year"]/parent::td/following-sibling::td[2]')
  dates << parse_date(date.text, "confirmed") if date

  date = doc.at('//h3[.="Version year"]/parent::td/following-sibling::td[2]')
  dates << parse_date(date.text, "updated") if date
  date = doc.at('//div[@id="idDocSetPropertiesWebPart"]/h2').text.match(/(?<=-)(19|20)\d{2}/)
  dates << parse_date(date.to_s, "published") if date
  dates
end

#fetch_docid(doc) ⇒ Araay<RelatonBib::DocumentIdentifier>

Parameters:

  • doc (Mechanize::Page)

Returns:

  • (Araay<RelatonBib::DocumentIdentifier>)


25
26
27
28
29
30
31
32
# File 'lib/relaton_itu/data_parser_r.rb', line 25

def fetch_docid(doc)
  # id = doc.at('//h3[.="Number"]/parent::td/following-sibling::td[2]').text # .match(/^[^\s\(]+/).to_s
  # %r{^(?<id1>[^\s\(\/]+(\/\d+)?)(\/(?<id2>\w+[^\s\(]+))?} =~ id
  id = doc.at('//div[@id="idDocSetPropertiesWebPart"]/h2').text.match(/^R-\w+-([^-]+(?:-\d{1,3})?)/)[1]
  [RelatonBib::DocumentIdentifier.new(type: "ITU", id: "ITU-R #{id}", primary: true)]
  # docid << RelatonBib::DocumentIdentifier.new(type: 'ITU', id: id2) if id2
  # docid
end

#fetch_doctype(type) ⇒ Object



94
95
96
# File 'lib/relaton_itu/data_parser_r.rb', line 94

def fetch_doctype(type)
  DocumentType.new(type: type)
end

Parameters:

  • url (String)

Returns:

  • (Array<RelatonBib::TypedUri>)


81
82
83
# File 'lib/relaton_itu/data_parser_r.rb', line 81

def fetch_link(url)
  [RelatonBib::TypedUri.new(type: "src", content: url)]
end

#fetch_status(doc) ⇒ RelatonBib::DocumentStatus?

Parameters:

  • doc (Mechanize::Page)

Returns:

  • (RelatonBib::DocumentStatus, nil)


87
88
89
90
91
92
# File 'lib/relaton_itu/data_parser_r.rb', line 87

def fetch_status(doc)
  s = doc.at('//h3[.="Status"]/parent::td/following-sibling::td[2]')
  return unless s

  RelatonBib::DocumentStatus.new stage: s.text
end

#fetch_title(doc) ⇒ Araay<RelatonBib::TypedTitleString>

Parameters:

  • doc (Mechanize::Page)

Returns:

  • (Araay<RelatonBib::TypedTitleString>)


36
37
38
39
# File 'lib/relaton_itu/data_parser_r.rb', line 36

def fetch_title(doc)
  content = doc.at('//h3[.="Title"]/parent::td/following-sibling::td[2]').text
  [RelatonBib::TypedTitleString.new(type: "main", content: content, language: "en", script: "Latn")]
end

#parse(doc, url, type) ⇒ RelatonItu::ItuBibliographicItem

Parse ITU-R document.

Parameters:

  • doc (Mechanize::Page)

    mechanize page

  • url (String)

    document url

  • type (String)

    document type

Returns:



14
15
16
17
18
19
20
21
# File 'lib/relaton_itu/data_parser_r.rb', line 14

def parse(doc, url, type)
  RelatonItu::ItuBibliographicItem.new(
    docid: fetch_docid(doc), title: fetch_title(doc),
    abstract: fetch_abstract(doc), date: fetch_date(doc), language: ["en"],
    link: fetch_link(url), script: ["Latn"], docstatus: fetch_status(doc),
    type: "standard", doctype: fetch_doctype(type)
  )
end

#parse_date(date, type) ⇒ RelatonBib::BibliographicDate

Parameters:

  • date (String)
  • type (String)

Returns:

  • (RelatonBib::BibliographicDate)


69
70
71
72
73
74
75
76
77
# File 'lib/relaton_itu/data_parser_r.rb', line 69

def parse_date(date, type)
  d = case date
      # when /^\d{4}$/ then date
      when /(\d{4})(\d{2})/ then "#{$1}-#{$2}"
      when %r{(\d{1,2})/(\d{1,2})/(\d{4})} then "#{$3}-#{$1}-#{$2}"
      else date
      end
  RelatonBib::BibliographicDate.new(type: type, on: d)
end