Class: RelatonIec::DataParser
- Inherits:
-
Object
- Object
- RelatonIec::DataParser
- Defined in:
- lib/relaton_iec/data_parser.rb
Constant Summary collapse
- DOMAIN =
"https://webstore.iec.ch"- ATTRS =
i[ docid structuredidentifier language script title doctype ics date contributor editorialgroup abstract copyright link relation ].freeze
- ABBREVS =
{ "ISO" => ["International Organization for Standardization", "www.iso.org"], "IEC" => ["International Electrotechnical Commission", "www.iec.ch"], "IEEE" => ["Institute of Electrical and Electronics Engineers", "www.ieee.org"], "ASTM" => ["American Society of Testing Materials", "www.astm.org"], "CISPR" => ["International special committee on radio interference", "www.iec.ch"], }.freeze
Instance Method Summary collapse
-
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
-
#contributor ⇒ Array<Hash>
Parse contributors.
- #copyright ⇒ Array<Hash>
-
#create_relations(doc) ⇒ Array<Hash>
Create relations.
-
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
-
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
-
#doctype ⇒ String
Parse document type.
-
#editorialgroup ⇒ Hash
Parse editorial group.
-
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
-
#initialize(pub) ⇒ DataParser
constructor
Initialize new instance.
-
#lang_to_script(lang) ⇒ String
Detect script.
-
#language ⇒ Array<String>
Parse languages.
-
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
-
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
-
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
-
#script ⇒ Array<String>
Parse scripts.
-
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
-
#title ⇒ Array<RelatonBib::TypedTitleString>
Parse titles.
-
#urn_id ⇒ String
Extract URN ID from URN.
Constructor Details
#initialize(pub) ⇒ DataParser
Initialize new instance.
23 24 25 |
# File 'lib/relaton_iec/data_parser.rb', line 23 def initialize(pub) @pub = pub end |
Instance Method Details
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
138 139 140 141 142 143 144 145 |
# File 'lib/relaton_iec/data_parser.rb', line 138 def abstract @pub["abstract"]&.map do |a| RelatonBib::FormattedString.new( content: a["content"], language: a["lang"], script: lang_to_script(a["lang"]), format: a["format"] ) end end |
#contributor ⇒ Array<Hash>
Parse contributors.
197 198 199 200 201 202 203 |
# File 'lib/relaton_iec/data_parser.rb', line 197 def contributor @pub["reference"].sub(/\s.*/, "").split("/").map do |abbrev| name, url = ABBREVS[abbrev] { entity: { name: name, url: url, abbreviation: abbrev }, role: [type: "publisher"] } end end |
#copyright ⇒ Array<Hash>
148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/relaton_iec/data_parser.rb', line 148 def copyright # rubocop:disable Metrics/AbcSize from = @pub["reference"].match(/(?<=:)\d{4}/).to_s from = @pub["releaseDate"]&.match(/\d{4}/).to_s if from.empty? return [] if from.nil? || from.empty? abbreviation = @pub["reference"].match(/.*?(?=\s)/).to_s owner = abbreviation.split("/").map do |abbrev| name, url = ABBREVS[abbrev] { name: name, abbreviation: abbrev, url: url } end [{ owner: owner, from: from }] end |
#create_relations(doc) ⇒ Array<Hash>
Create relations.
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
# File 'lib/relaton_iec/data_parser.rb', line 272 def create_relations(doc) # rubocop:disable Metrics/MethodLength doc.xpath('//ROW[STATUS[.!="PREPARING" and .!="PUBLISHED"]]') .map do |r| r_type = r.at("STATUS").text.downcase type = case r_type when "revised", "replaced" then "updates" when "withdrawn" then "obsoletes" else r_type end ref = r.at("FULL_NAME").text fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain" docid = RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true) bibitem = IecBibliographicItem.new(formattedref: fref, docid: [docid]) RelatonBib::DocumentRelation.new type: type, bibitem: bibitem end end |
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/relaton_iec/data_parser.rb', line 179 def date { "published" => "publicationDate", "stable-until" => "stabilityDate", "confirmed" => "confirmationDate", "obsoleted" => "dateOfWithdrawal", }.reduce([]) do |a, (k, v)| next a unless @pub[v] a << RelatonBib::BibliographicDate.new(type: k, on: @pub[v]) end end |
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
46 47 48 49 50 51 |
# File 'lib/relaton_iec/data_parser.rb', line 46 def docid ids = [] ids << RelatonBib::DocumentIdentifier.new(id: @pub["reference"], type: "IEC", primary: true) urnid = "urn:#{@pub['urnAlt'][0]}" ids << RelatonBib::DocumentIdentifier.new(id: urnid, type: "URN") end |
#doctype ⇒ String
Parse document type.
235 236 237 238 239 240 241 242 243 244 |
# File 'lib/relaton_iec/data_parser.rb', line 235 def doctype case @pub["stdType"] when "IS" then "international-standard" when "TR" then "technical-report" when "TS" then "technical-specification" when "PAS" then "publicly-available-specification" when "SRD" then "system-reference-delivrabble" else @pub["stdType"].downcase end end |
#editorialgroup ⇒ Hash
Parse editorial group.
120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/relaton_iec/data_parser.rb', line 120 def editorialgroup return unless @pub["committee"] wg = @pub["committee"]["reference"] { technical_committee: [{ name: wg, type: "technicalCommittee", number: wg.match(/\d+/)&.to_s&.to_i, }], } end |
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
166 167 168 169 170 171 172 |
# File 'lib/relaton_iec/data_parser.rb', line 166 def ics return [] unless @pub["classifications"] @pub["classifications"].select { |c| c["type"] == "ICS" }.map do |c| RelatonIsoBib::Ics.new(c["value"]) end end |
#lang_to_script(lang) ⇒ String
Detect script.
96 97 98 99 100 |
# File 'lib/relaton_iec/data_parser.rb', line 96 def lang_to_script(lang) case lang when "en", "fr", "es" then "Latn" end end |
#language ⇒ Array<String>
Parse languages.
73 74 75 |
# File 'lib/relaton_iec/data_parser.rb', line 73 def language @pub["title"].map { |t| t["lang"] }.uniq end |
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
210 211 212 213 214 215 216 217 218 219 |
# File 'lib/relaton_iec/data_parser.rb', line 210 def link url = "#{DOMAIN}/publication/#{urn_id}" l = [RelatonBib::TypedUri.new(content: url, type: "src")] RelatonBib.array(@pub["releaseItems"]).each_with_object(l) do |r, a| next unless r["type"] == "PREVIEW" url = "#{DOMAIN}/preview/#{r['contentRef']['fileName']}" a << RelatonBib::TypedUri.new(content: url, type: "obp") end end |
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
32 33 34 35 36 37 38 39 |
# File 'lib/relaton_iec/data_parser.rb', line 32 def parse # rubocop:disable Metrics/AbcSize args = ATTRS.each_with_object({}) { |a, h| h[a] = send a } args[:docstatus] = RelatonBib::DocumentStatus.new stage: @pub["status"] args[:edition] = @pub["edition"] args[:price_code] = @pub["priceInfo"]["priceCode"] args[:place] = ["Geneva"] IecBibliographicItem.new(**args) end |
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
251 252 253 254 255 256 257 258 259 260 261 262 263 |
# File 'lib/relaton_iec/data_parser.rb', line 251 def relation # rubocop:disable Metrics/MethodLength try = 0 begin uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?" \ "Openagent&url=#{urn_id}" resp = Net::HTTP.get_response uri doc = Nokogiri::XML resp.body create_relations doc rescue StandardError => e try += 1 try < 3 ? retry : raise(e) end end |
#script ⇒ Array<String>
Parse scripts.
82 83 84 85 86 87 |
# File 'lib/relaton_iec/data_parser.rb', line 82 def script language.each_with_object([]) do |l, s| scr = lang_to_script l s << scr if scr && !s.include?(scr) end end |
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
58 59 60 61 62 63 64 65 66 |
# File 'lib/relaton_iec/data_parser.rb', line 58 def structuredidentifier m = @pub["reference"].match( /(?<=\s)(?<project>\w+)(?:-(?<part>\w*)(?:-(?<subpart>\w*))?)?/, ) RelatonIsoBib::StructuredIdentifier.new( project_number: m[:project], part: m[:part], subpart: m[:subpart], type: "IEC", id: @pub["reference"] ) end |
#title ⇒ Array<RelatonBib::TypedTitleString>
Parse titles.
107 108 109 110 111 112 113 |
# File 'lib/relaton_iec/data_parser.rb', line 107 def title @pub["title"].map do |t| RelatonBib::TypedTitleString.new( content: t["value"], language: t["lang"], script: lang_to_script(t["lang"]), type: "main", ) end end |
#urn_id ⇒ String
Extract URN ID from URN.
226 227 228 |
# File 'lib/relaton_iec/data_parser.rb', line 226 def urn_id @pub["urn"].split(":").last end |