Class: RelatonNist::Scrapper
- Inherits:
-
Object
- Object
- RelatonNist::Scrapper
- Defined in:
- lib/relaton_nist/scrapper.rb
Constant Summary collapse
- DOMAIN =
"https://csrc.nist.gov".freeze
Class Method Summary collapse
-
.id_cleanup(id) ⇒ Object
Strip status from doc id.
-
.parse_page(hit_data) ⇒ Hash
Parse page.
Class Method Details
.id_cleanup(id) ⇒ Object
Strip status from doc id
51 52 53 |
# File 'lib/relaton_nist/scrapper.rb', line 51 def id_cleanup(id) id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "") end |
.parse_page(hit_data) ⇒ Hash
Parse page.
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/relaton_nist/scrapper.rb', line 13 def parse_page(hit_data) doc = get_page hit_data[:url] docid = fetch_docid(doc) doctype = "standard" titles = fetch_titles(hit_data) unless /^(SP|NISTIR|FIPS) /.match docid[0].id doctype = id_cleanup(docid[0].id) docid[0] = RelatonBib::DocumentIdentifier.new(id: titles[0][:content], type: "NIST") end NistBibliographicItem.new( fetched: Date.today.to_s, type: "standard", # id: fetch_id(doc), titles: titles, link: fetch_link(doc), docid: docid, dates: fetch_dates(doc, hit_data[:release_date]), contributors: fetch_contributors(doc), edition: fetch_edition(hit_data[:code]), language: ["en"], script: ["Latn"], abstract: fetch_abstract(doc), docstatus: fetch_status(doc, hit_data[:status]), copyright: fetch_copyright(doc), relations: fetch_relations(doc), series: fetch_series(doc), keyword: fetch_keywords(doc), commentperiod: fetch_commentperiod(doc), doctype: doctype, ) end |