Module: RelatonIetf::Scrapper
- Defined in:
- lib/relaton_ietf/scrapper.rb
Overview
Scrapper module
Constant Summary collapse
- RFC_URI_PATTERN =
"https://xml2rfc.tools.ietf.org/public/rfc/bibxml"- BCP_URI_PATTERN =
ID_URI_PATTERN = “xml2rfc.tools.ietf.org/public/rfc/bibxml-ids/reference.CODE”
"https://www.rfc-editor.org/info/CODE"
Class Method Summary collapse
- .fetch_rfc(reference, is_relation = false, url = nil, ver = nil) ⇒ RelatonIetf::IetfBibliographicItem
- .scrape_page(text, is_relation = false) ⇒ RelatonIetf::IetfBibliographicItem
Class Method Details
.fetch_rfc(reference, is_relation = false, url = nil, ver = nil) ⇒ RelatonIetf::IetfBibliographicItem
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/relaton_ietf/scrapper.rb', line 50 def fetch_rfc(reference, is_relation = false, url = nil, ver = nil) return unless reference ietf_item( is_relation: is_relation, id: reference[:anchor], type: "standard", docid: docids(reference, ver), status: status(reference), language: [language(reference)], link: link(reference, url, ver), title: titles(reference), abstract: abstracts(reference), contributor: contributors(reference), date: dates(reference), series: series(reference), place: ["Fremont, CA"], keyword: reference.xpath("front/keyword").map(&:text), doctype: doctype(reference[:anchor]), ) end |
.scrape_page(text, is_relation = false) ⇒ RelatonIetf::IetfBibliographicItem
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/relaton_ietf/scrapper.rb', line 23 def scrape_page(text, is_relation = false) # Remove initial "IETF " string if specified ref = text.gsub(/^IETF /, "") case ref when /^RFC/ then rfc_item [""], ref, is_relation when /^I-D/ then rfc_item ["3"], ref, is_relation when /^W3C/ then rfc_item ["4", "2"], ref, is_relation when /^(ANSI|CCITT|FIPS|IANA|ISO|ITU|NIST|OASIS|PKCS)/ rfc_item ["2"], ref, is_relation when /^(3GPP|SDO-3GPP)/ then rfc_item ["5"], ref, is_relation when /^IEEE/ then rfc_item ["6", "2"], ref, is_relation when /^BCP/ then bcp_item BCP_URI_PATTERN.dup, ref else raise RelatonBib::RequestError, "#{ref}: not recognised for RFC" end rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError, SocketError raise RelatonBib::RequestError, "No document found for #{ref} reference." end |