Module: RelatonIso::Scrapper
Overview
Scrapper.
Constant Summary collapse
- DOMAIN =
rubocop:disable Metrics/ModuleLength
"https://www.iso.org"- TYPES =
{ "TS" => "technical-specification", "DTS" => "technical-specification", "TR" => "technical-report", "DTR" => "technical-report", "PAS" => "publicly-available-specification", # "AWI" => "approvedWorkItem", # "CD" => "committeeDraft", # "FDIS" => "finalDraftInternationalStandard", # "NP" => "newProposal", # "DIS" => "draftInternationalStandard", # "WD" => "workingDraft", # "R" => "recommendation", "Guide" => "guide", "ISO" => "international-standard", "IEC" => "international-standard", "IWA" => "international-workshop-agreement", }.freeze
- STGABBR =
{ "00" => "NWIP", "10" => "AWI", "20" => "WD", "30" => "CD", "40" => "DIS", "50" => "FDIS", "60" => { "00" => "PRF", "60" => "FINAL" }, }.freeze
- PUBLISHERS =
{ "IEC" => { name: "International Electrotechnical Commission", url: "www.iec.ch" }, "ISO" => { name: "International Organization for Standardization", url: "www.iso.org" }, "IEEE" => { name: "Institute of Electrical and Electronics Engineers", url: "www.ieee.org" }, "SAE" => { name: "SAE International", url: "www.sae.org" }, "CIE" => { name: " International Commission on Illumination", url: "cie.co.at" }, "ASME" => { name: "American Society of Mechanical Engineers", url: "www.asme.org" }, }.freeze
Instance Method Summary collapse
-
#fetch_relaton_docids(doc, pubid) ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
-
#isoref(pubid) ⇒ String
Create ISO reference identifier with English language.
-
#parse_page(path, lang = nil) ⇒ RelatonIsoBib::IsoBibliographicItem
Parse page.
Instance Method Details
#fetch_relaton_docids(doc, pubid) ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
97 98 99 100 101 102 103 104 |
# File 'lib/relaton_iso/scrapper.rb', line 97 def fetch_relaton_docids(doc, pubid) pubid.stage ||= Pubid::Iso::Identifier.parse_stage(stage_code(doc)) [ DocumentIdentifier.new(id: pubid, type: "ISO", primary: true), RelatonBib::DocumentIdentifier.new(id: isoref(pubid), type: "iso-reference"), DocumentIdentifier.new(id: pubid, type: "URN"), ] end |
#isoref(pubid) ⇒ String
Create ISO reference identifier with English language.
113 114 115 116 |
# File 'lib/relaton_iso/scrapper.rb', line 113 def isoref(pubid) params = pubid.to_h.reject { |k, _| k == :typed_stage } Pubid::Iso::Identifier.create(language: "en", **params).to_s(format: :ref_num_short) end |
#parse_page(path, lang = nil) ⇒ RelatonIsoBib::IsoBibliographicItem
Parse page.
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/relaton_iso/scrapper.rb', line 57 def parse_page(path, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity doc, url = get_page path id = doc.at("//nav[contains(@class,'heading-condensed')]/h1").text.split(" | ").first pubid = Pubid::Iso::Identifier.parse(id) # Fetch edition. edition = doc.at("//div[div[.='Edition']]/text()[last()]")&.text&.match(/\d+$/)&.to_s pubid.root.edition ||= edition if pubid.base titles, abstract, langs = fetch_titles_abstract(doc, lang) RelatonIsoBib::IsoBibliographicItem.new( docid: fetch_relaton_docids(doc, pubid), docnumber: fetch_docnumber(pubid), edition: edition, language: langs.map { |l| l[:lang] }, script: langs.map { |l| script(l[:lang]) }.uniq, title: titles, doctype: fetch_type(id), docstatus: fetch_status(doc), ics: fetch_ics(doc), date: fetch_dates(doc, id), contributor: fetch_contributors(id), editorialgroup: fetch_workgroup(doc), abstract: abstract, copyright: fetch_copyright(doc), link: fetch_link(doc, url), relation: fetch_relations(doc), place: ["Geneva"], structuredidentifier: fetch_structuredidentifier(pubid), ) end |