Class: RelatonNist::Scrapper

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_nist/scrapper.rb

Constant Summary collapse

DOMAIN =
"https://csrc.nist.gov".freeze

Class Method Summary collapse

Class Method Details

.id_cleanup(id) ⇒ Object

Strip status from doc id

Parameters:

  • id

    String

Returns:

  • String



51
52
53
# File 'lib/relaton_nist/scrapper.rb', line 51

def id_cleanup(id)
  id.sub(/ \(WITHDRAWN\)/, "").sub(/ \(([^) ]+ )?DRAFT\)/i, "")
end

.parse_page(hit_data) ⇒ Hash

Parse page.

Parameters:

  • hit_data (Hash)

Returns:

  • (Hash)


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/relaton_nist/scrapper.rb', line 13

def parse_page(hit_data)
  doc = get_page hit_data[:url]

  docid = fetch_docid(doc)
  doctype = "standard"
  titles = fetch_titles(hit_data)
  unless /^(SP|NISTIR|FIPS) /.match docid[0].id
    doctype = id_cleanup(docid[0].id)
    docid[0] = RelatonBib::DocumentIdentifier.new(id: titles[0][:content], type: "NIST")
  end

  NistBibliographicItem.new(
    fetched: Date.today.to_s,
    type: "standard",
    # id: fetch_id(doc),
    titles: titles,
    link: fetch_link(doc),
    docid: docid,
    dates: fetch_dates(doc, hit_data[:release_date]),
    contributors: fetch_contributors(doc),
    edition: fetch_edition(hit_data[:code]),
    language: ["en"],
    script: ["Latn"],
    abstract: fetch_abstract(doc),
    docstatus: fetch_status(doc, hit_data[:status]),
    copyright: fetch_copyright(doc),
    relations: fetch_relations(doc),
    series: fetch_series(doc),
    keyword: fetch_keywords(doc),
    commentperiod: fetch_commentperiod(doc),
    doctype: doctype,
  )
end