Module: RelatonIetf::Scrapper

Defined in:
lib/relaton_ietf/scrapper.rb

Overview

Scrapper module

Constant Summary collapse

RFC_URI_PATTERN =
"https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.CODE"
ID_URI_PATTERN =
"https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"
BCP_URI_PATTERN =
"https://www.rfc-editor.org/info/CODE"

Class Method Summary collapse

Class Method Details

.fetch_rfc(reference, is_relation = false) ⇒ RelatonIetf::IetfBibliographicItem

Parameters:

  • reference (String)

Returns:



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/relaton_ietf/scrapper.rb', line 42

def fetch_rfc(reference, is_relation = false)
  return unless reference

  ietf_item(
    is_relation: is_relation,
    id: reference[:anchor],
    docid: docids(reference),
    status: status(reference),
    language: [language(reference)],
    link: [{ type: "src", content: reference[:target] }],
    title: titles(reference),
    abstract: abstracts(reference),
    contributor: contributors(reference),
    date: dates(reference),
    series: series(reference),
    keyword: reference.xpath("front/keyword").map(&:text),
  )
end

.scrape_page(text, is_relation = false) ⇒ RelatonIetf::IetfBibliographicItem

Parameters:

  • text (String)
  • is_relation (TrueClass, FalseClass) (defaults to: false)

Returns:



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/relaton_ietf/scrapper.rb', line 23

def scrape_page(text, is_relation = false)
  # Remove initial "IETF " string if specified
  ref = text.gsub(/^IETF /, "")

  case ref
  when /^RFC/ then rfc_item RFC_URI_PATTERN.dup, ref, is_relation
  when /^I-D/ then rfc_item ID_URI_PATTERN.dup, ref, is_relation
  when /^BCP/ then bcp_item BCP_URI_PATTERN.dup, ref
  else
    raise RelatonBib::RequestError, "#{ref}: not recognised for RFC"
  end
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
       Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
       Net::ProtocolError, SocketError
  raise RelatonBib::RequestError, "No document found for #{ref} reference."
end