Module: RelatonIetf::Scrapper

Defined in:
lib/relaton_ietf/scrapper.rb

Overview

Scrapper module

Constant Summary collapse

RFC_URI_PATTERN =
"https://xml2rfc.tools.ietf.org/public/rfc/bibxml"
BCP_URI_PATTERN =
"https://www.rfc-editor.org/info/CODE"

Class Method Summary collapse

Class Method Details

.fetch_rfc(reference, is_relation = false, url = nil, ver = nil) ⇒ RelatonIetf::IetfBibliographicItem

Parameters:

  • reference (String)
  • is_relation (TrueClass, FalseClass) (defaults to: false)
  • url (String, NilClass) (defaults to: nil)
  • ver (String, NilClass) (defaults to: nil)

    Internet Draft version

Returns:



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/relaton_ietf/scrapper.rb', line 50

def fetch_rfc(reference, is_relation = false, url = nil, ver = nil)
  return unless reference

  ietf_item(
    is_relation: is_relation,
    id: reference[:anchor],
    type: "standard",
    docid: docids(reference, ver),
    status: status(reference),
    language: [language(reference)],
    link: link(reference, url, ver),
    title: titles(reference),
    abstract: abstracts(reference),
    contributor: contributors(reference),
    date: dates(reference),
    series: series(reference),
    place: ["Fremont, CA"],
    keyword: reference.xpath("front/keyword").map(&:text),
    doctype: doctype(reference[:anchor]),
  )
end

.scrape_page(text, is_relation = false) ⇒ RelatonIetf::IetfBibliographicItem

Parameters:

  • text (String)
  • is_relation (TrueClass, FalseClass) (defaults to: false)

Returns:



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/relaton_ietf/scrapper.rb', line 23

def scrape_page(text, is_relation = false)
  # Remove initial "IETF " string if specified
  ref = text.gsub(/^IETF /, "")

  case ref
  when /^RFC/ then rfc_item [""], ref, is_relation
  when /^I-D/ then rfc_item ["3"], ref, is_relation
  when /^W3C/ then rfc_item ["4", "2"], ref, is_relation
  when /^(ANSI|CCITT|FIPS|IANA|ISO|ITU|NIST|OASIS|PKCS)/
    rfc_item ["2"], ref, is_relation
  when /^(3GPP|SDO-3GPP)/ then rfc_item ["5"], ref, is_relation
  when /^IEEE/ then rfc_item ["6", "2"], ref, is_relation
  when /^BCP/ then bcp_item BCP_URI_PATTERN.dup, ref
  else
    raise RelatonBib::RequestError, "#{ref}: not recognised for RFC"
  end
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
       Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
       Net::ProtocolError, SocketError
  raise RelatonBib::RequestError, "No document found for #{ref} reference."
end