Module: RelatonIec::Scrapper

Defined in:
lib/relaton_iec/scrapper.rb

Overview

Scrapper.

Constant Summary collapse

DOMAIN =
"https://webstore.iec.ch"
ABBREVS =
{
  "ISO" => ["International Organization for Standardization", "www.iso.org"],
  "IEC" => ["International Electrotechnical Commission", "www.iec.ch"],
  "CISPR" => ["International special committee on radio interference", "www.iec.ch"],
}.freeze
TYPES =
{
  "ISO" => "international-standard",
  "TS" => "technical-specification",
  "TR" => "technical-report",
  "PAS" => "publicly-available-specification",
  "AWI" => "appruved-work-item",
  "CD" => "committee-draft",
  "FDIS" => "final-draft-international-standard",
  "NP" => "new-proposal",
  "DIS" => "draft-international-standard",
  "WD" => "working-draft",
  "R" => "recommendation",
  "Guide" => "guide",
  "SRD" => "system-reference-delivrabble",
}.freeze

Class Method Summary collapse

Class Method Details

.parse_page(hit_data) ⇒ Hash

Parse page.

Parameters:

  • hit_data (Hash)

Returns:

  • (Hash)


40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/relaton_iec/scrapper.rb', line 40

def parse_page(hit_data)
  doc = get_page hit_data[:url]

  # Fetch edition.
  edition = doc.at(
    "//th[contains(., 'Edition')]/following-sibling::td/span",
  ).text

  status, relations = fetch_status_relations hit_data[:url]

  IecBibliographicItem.new(
    fetched: Date.today.to_s,
    docid: fetch_docid(hit_data),
    structuredidentifier: fetch_structuredidentifier(doc),
    edition: edition,
    language: ["en"],
    script: ["Latn"],
    title: fetch_titles(hit_data),
    doctype: fetch_type(doc),
    docstatus: status,
    ics: fetch_ics(doc),
    date: fetch_dates(doc),
    contributor: fetch_contributors(hit_data[:code]),
    editorialgroup: fetch_workgroup(doc),
    abstract: fetch_abstract(doc),
    copyright: fetch_copyright(hit_data[:code], doc),
    link: fetch_link(doc, hit_data[:url]),
    relation: relations,
    place: ["Geneva"],
  )
end