Class: RelatonNist::Scrapper

Inherits:

Object

Object
RelatonNist::Scrapper

show all

Defined in:: lib/relaton_nist/scrapper.rb

Constant Summary collapse

DOMAIN =

"https://csrc.nist.gov".freeze

Class Method Summary collapse

.id_cleanup(id) ⇒ Object

Strip status from doc id.
.parse_page(hit_data) ⇒ Hash

Parse page.

Class Method Details

.id_cleanup(id) ⇒ `Object`

Strip status from doc id

Parameters:

id —

String

Returns:

String



51
52
53

# File 'lib/relaton_nist/scrapper.rb', line 51

def id_cleanup(id)
  id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
end

.parse_page(hit_data) ⇒ `Hash`

Parse page.

Parameters:

hit_data (Hash)

Returns:

(Hash)

# File 'lib/relaton_nist/scrapper.rb', line 13

def parse_page(hit_data)
  doc = get_page hit_data[:url]

  docid = fetch_docid(doc)
  doctype = "standard"
  titles = fetch_titles(hit_data)
  unless /^(SP|NISTIR|FIPS) /.match docid[0].id
    doctype = id_cleanup(docid[0].id)
    docid[0] = RelatonBib::DocumentIdentifier.new(id: titles[0][:content], type: "NIST")
  end

  NistBibliographicItem.new(
    fetched: Date.today.to_s,
    type: "standard",
    # id: fetch_id(doc),
    titles: titles,
    link: fetch_link(doc),
    docid: docid,
    dates: fetch_dates(doc, hit_data[:release_date]),
    contributors: fetch_contributors(doc),
    edition: fetch_edition(hit_data[:code]),
    language: ["en"],
    script: ["Latn"],
    abstract: fetch_abstract(doc),
    docstatus: fetch_status(doc, hit_data[:status]),
    copyright: fetch_copyright(doc),
    relations: fetch_relations(doc),
    series: fetch_series(doc),
    keyword: fetch_keywords(doc),
    commentperiod: fetch_commentperiod(doc),
    doctype: doctype,
  )
end

Class: RelatonNist::Scrapper

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.id_cleanup(id) ⇒ Object

.parse_page(hit_data) ⇒ Hash

.id_cleanup(id) ⇒ `Object`

.parse_page(hit_data) ⇒ `Hash`