Module: Iecbib::Scrapper

Defined in:
lib/iecbib/scrapper.rb

Overview

Scrapper. rubocop:disable Metrics/ModuleLength

Constant Summary collapse

DOMAIN =
'https://webstore.iec.ch'
TYPES =
{
  'ISO'   => 'international-standard',
  'TS'    => 'technicalSpecification',
  'TR'    => 'technicalReport',
  'PAS'   => 'publiclyAvailableSpecification',
  'AWI'   => 'appruvedWorkItem',
  'CD'    => 'committeeDraft',
  'FDIS'  => 'finalDraftInternationalStandard',
  'NP'    => 'newProposal',
  'DIS'   => 'draftInternationalStandard',
  'WD'    => 'workingDraft',
  'R'     => 'recommendation',
  'Guide' => 'guide'
}.freeze

Class Method Summary collapse

Class Method Details

.parse_page(hit_data) ⇒ Hash

Parse page. rubocop:disable Metrics/AbcSize, Metrics/MethodLength

Parameters:

  • hit (Hash)

Returns:

  • (Hash)


52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/iecbib/scrapper.rb', line 52

def parse_page(hit_data)
  doc = get_page hit_data[:url]

  # Fetch edition.
  edition = doc.at("//th[contains(., 'Edition')]/following-sibling::td/span").text

  status, relations = fetch_status_relations hit_data[:url]

  IsoBibItem::IsoBibliographicItem.new(
    docid:        fetch_docid(doc),
    edition:      edition,
    language:     ['en'],
    script:       ['Latn'],
    titles:       fetch_titles(hit_data),
    type:         fetch_type(doc),
    docstatus:    status,
    ics:          fetch_ics(doc),
    dates:        fetch_dates(doc),
    contributors: fetch_contributors(hit_data[:code]),
    workgroup:    fetch_workgroup(doc),
    abstract:     fetch_abstract(doc),
    copyright:    fetch_copyright(hit_data[:code], doc),
    link:         fetch_link(doc, hit_data[:url]),
    relations:    relations
  )
end