Class: RelatonIso::Scrapper
- Inherits:
-
Object
- Object
- RelatonIso::Scrapper
- Defined in:
- lib/relaton_iso/scrapper.rb
Overview
Scrapper.
Constant Summary collapse
- DOMAIN =
rubocop:disable Metrics/ModuleLength
"https://www.iso.org"- TYPES =
{ "TS" => "technical-specification", "DTS" => "technical-specification", "TR" => "technical-report", "DTR" => "technical-report", "PAS" => "publicly-available-specification", # "AWI" => "approvedWorkItem", # "CD" => "committeeDraft", # "FDIS" => "finalDraftInternationalStandard", # "NP" => "newProposal", # "DIS" => "draftInternationalStandard", # "WD" => "workingDraft", # "R" => "recommendation", "Guide" => "guide", "ISO" => "international-standard", "IEC" => "international-standard", "IWA" => "international-workshop-agreement", }.freeze
- STGABBR =
{ "00" => "NWIP", "10" => "AWI", "20" => "WD", "30" => "CD", "40" => "DIS", "50" => "FDIS", "60" => { "00" => "PRF", "60" => "FINAL" }, }.freeze
- PUBLISHERS =
{ "IEC" => { name: "International Electrotechnical Commission", url: "www.iec.ch" }, "ISO" => { name: "International Organization for Standardization", url: "www.iso.org" }, "IEEE" => { name: "Institute of Electrical and Electronics Engineers", url: "www.ieee.org" }, "SAE" => { name: "SAE International", url: "www.sae.org" }, "CIE" => { name: " International Commission on Illumination", url: "cie.co.at" }, "ASME" => { name: "American Society of Mechanical Engineers", url: "www.asme.org" }, }.freeze
Class Method Summary collapse
Instance Method Summary collapse
- #edition ⇒ Object
-
#fetch_relaton_docids ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
- #id ⇒ Object
-
#initialize(lang, errors) ⇒ Scrapper
constructor
extend self.
-
#isoref ⇒ String
Create ISO reference identifier with English language.
-
#parse(path) ⇒ Object
rubocop:disable Metrics/AbcSize,Metrics/MethodLength.
- #pubid ⇒ Object
Constructor Details
#initialize(lang, errors) ⇒ Scrapper
extend self
53 54 55 56 |
# File 'lib/relaton_iso/scrapper.rb', line 53 def initialize(lang, errors) @lang = lang @errors = errors end |
Class Method Details
.parse_page(path, lang: nil, errors: {}) ⇒ RelatonIsoBib::IsoBibliographicItem
Parse page.
63 64 65 |
# File 'lib/relaton_iso/scrapper.rb', line 63 def self.parse_page(path, lang: nil, errors: {}) new(lang, errors).parse(path) end |
Instance Method Details
#edition ⇒ Object
111 112 113 114 115 116 117 |
# File 'lib/relaton_iso/scrapper.rb', line 111 def edition return @edition if defined?(@edition) ed = @doc.at("//div[div[.='Edition']]/text()[last()]") @errors[:edition] &&= ed.nil? @edition = ed && ed.text.match(/\d+$/).to_s end |
#fetch_relaton_docids ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
124 125 126 127 128 129 130 131 |
# File 'lib/relaton_iso/scrapper.rb', line 124 def fetch_relaton_docids pubid.stage ||= Pubid::Iso::Identifier.parse_stage(stage_code) [ DocumentIdentifier.new(id: pubid, type: "ISO", primary: true), RelatonBib::DocumentIdentifier.new(id: isoref, type: "iso-reference"), DocumentIdentifier.new(id: pubid, type: "URN"), ] end |
#id ⇒ Object
93 94 95 96 97 98 99 |
# File 'lib/relaton_iso/scrapper.rb', line 93 def id return @id if defined?(@id) did = @doc.at("//h1/span[1]") @errors[:id] &&= did.nil? @id = did && did.text.split(" | ").first.strip end |
#isoref ⇒ String
Create ISO reference identifier with English language.
138 139 140 141 |
# File 'lib/relaton_iso/scrapper.rb', line 138 def isoref params = pubid.to_h.reject { |k, _| k == :typed_stage } Pubid::Iso::Identifier.create(language: "en", **params).to_s(format: :ref_num_short) end |
#parse(path) ⇒ Object
rubocop:disable Metrics/AbcSize,Metrics/MethodLength
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/relaton_iso/scrapper.rb', line 67 def parse(path) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength @doc, @url = get_page path titles, abstract, langs = fetch_titles_abstract RelatonIsoBib::IsoBibliographicItem.new( docid: fetch_relaton_docids, docnumber: fetch_docnumber, edition: edition, language: langs.map { |l| l[:lang] }, script: langs.map { |l| script(l[:lang]) }.uniq, title: titles, doctype: fetch_type, docstatus: fetch_status, ics: fetch_ics, date: fetch_dates, contributor: fetch_contributors, editorialgroup: fetch_workgroup, abstract: abstract, copyright: fetch_copyright, link: fetch_link(@url), relation: fetch_relations, place: ["Geneva"], structuredidentifier: fetch_structuredidentifier, ) end |
#pubid ⇒ Object
101 102 103 104 105 106 107 108 109 |
# File 'lib/relaton_iso/scrapper.rb', line 101 def pubid return @pubid if @pubid @pubid = Pubid::Iso::Identifier.parse(id) @pubid.root.edition ||= edition if @pubid.base @pubid rescue StandardError => e Util.error "Failed to parse pubid from #{id}: #{e.message}" end |