Module: RelatonIso::Scrapper
- Defined in:
- lib/relaton_iso/scrapper.rb
Overview
Scrapper. rubocop:disable Metrics/ModuleLength
Constant Summary collapse
- DOMAIN =
"https://www.iso.org"- TYPES =
{ "TS" => "technical-specification", "TR" => "technical-report", "PAS" => "publicly-available-specification", # "AWI" => "approvedWorkItem", # "CD" => "committeeDraft", # "FDIS" => "finalDraftInternationalStandard", # "NP" => "newProposal", # "DIS" => "draftInternationalStandard", # "WD" => "workingDraft", # "R" => "recommendation", "Guide" => "guide", }.freeze
- STGABBR =
{ "00" => "NWIP", "10" => "AWI", "20" => "WD", "30" => "CD", "40" => "DIS", "50" => "FDIS", "60" => { "00" => "PRF", "60" => "FINAL" }, }.freeze
- PUBLISHERS =
{ "IEC" => { name: "International Electrotechnical Commission", url: "www.iec.ch" }, "ISO" => { name: "International Organization for Standardization", url: "www.iso.org" }, "IEEE" => { name: "Institute of Electrical and Electronics Engineers", url: "www.ieee.org" }, "SAE" => { name: "SAE International", url: "www.sae.org" }, "CIE" => { name: " International Commission on Illumination", url: "cie.co.at" }, "ASME" => { name: "American Society of Mechanical Engineers", url: "www.asme.org" }, }.freeze
Class Method Summary collapse
-
.parse_page(hit_data, lang = nil) ⇒ Hash
Parse page.
Class Method Details
.parse_page(hit_data, lang = nil) ⇒ Hash
Parse page. rubocop:disable Metrics/AbcSize, Metrics/MethodLength
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/relaton_iso/scrapper.rb', line 58 def parse_page(hit_data, lang = nil) path = "/contents/data/standard#{hit_data["splitPath"]}/#{hit_data["csnumber"]}.html" doc, url = get_page path # Fetch edition. edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")&. children&.last&.text&.match(/\d+/)&.to_s titles, abstract, langs = fetch_titles_abstract(doc, lang) RelatonIsoBib::IsoBibliographicItem.new( fetched: Date.today.to_s, docid: fetch_docid(hit_data["docRef"]), docnumber: fetch_docnumber(doc), edition: edition, language: langs.map { |l| l[:lang] }, script: langs.map { |l| script(l[:lang]) }.uniq, title: titles, doctype: fetch_type(hit_data["docRef"]), docstatus: fetch_status(doc), ics: fetch_ics(doc), date: fetch_dates(doc, hit_data["docRef"]), contributor: fetch_contributors(hit_data["docRef"]), editorialgroup: fetch_workgroup(doc), abstract: abstract, copyright: fetch_copyright(hit_data["docRef"], doc), link: fetch_link(doc, url), relation: fetch_relations(doc), place: ["Geneva"], structuredidentifier: fetch_structuredidentifier(doc), ) end |