Module: IETFBib::Scrapper

Defined in:
lib/ietfbib/scrapper.rb

Overview

Scrapper module

Constant Summary collapse

RFC_URI_PATTERN =
"https://www.rfc-editor.org/refs/bibxml/reference.CODE"
ID_URI_PATTERN =
"https://xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE"

Class Method Summary collapse

Class Method Details

.scrape_page(text) ⇒ IsoBibItem::BibliographicItem

Parameters:

  • text (String)

Returns:

  • (IsoBibItem::BibliographicItem)


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/ietfbib/scrapper.rb', line 19

def scrape_page(text)

  # Remove initial "IETF " string if specified
  ref = text.
    gsub(/^IETF /, "").
    sub(' ', '.') + '.xml'

  uri = case ref
  when /^RFC/
    RFC_URI_PATTERN.dup
  when /^I-D/
    ID_URI_PATTERN.dup
  else
    warn "#{ref}: not recognised for RFC"
    return
  end

  uri = uri.gsub("CODE", ref)
  res = Net::HTTP.get_response(URI(uri))
  if res.code != "200"
    warn "No document found at #{uri}"
    return
  end
  doc = Nokogiri::HTML Net::HTTP.get(URI(uri))
  @reference = doc.at('//reference')
  return unless @reference
  bib_item
end