Class: RelatonIec::DataParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_iec/data_parser.rb

Constant Summary collapse

DOMAIN =
"https://webstore.iec.ch"
ATTRS =
i[
  docid structuredidentifier language script title doctype
  ics date contributor editorialgroup abstract copyright link relation
].freeze
ABBREVS =
{
  "ISO" => ["International Organization for Standardization", "www.iso.org"],
  "IEC" => ["International Electrotechnical Commission", "www.iec.ch"],
  "IEEE" => ["Institute of Electrical and Electronics Engineers", "www.ieee.org"],
  "ASTM" => ["American Society of Testing Materials", "www.astm.org"],
  "CISPR" => ["International special committee on radio interference", "www.iec.ch"],
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(pub) ⇒ DataParser

Initialize new instance.

Parameters:

  • pub (Hash)

    document data



23
24
25
# File 'lib/relaton_iec/data_parser.rb', line 23

def initialize(pub)
  @pub = pub
end

Instance Method Details

#abstractArray<RelatonBib::FormattedString>

Parse abstract.

Returns:

  • (Array<RelatonBib::FormattedString>)

    abstract



138
139
140
141
142
143
144
145
# File 'lib/relaton_iec/data_parser.rb', line 138

def abstract
  @pub["abstract"]&.map do |a|
    RelatonBib::FormattedString.new(
      content: a["content"], language: a["lang"], script: lang_to_script(a["lang"]),
      format: a["format"]
    )
  end
end

#contributorArray<Hash>

Parse contributors.

Returns:

  • (Array<Hash>)

    contributors



197
198
199
200
201
202
203
# File 'lib/relaton_iec/data_parser.rb', line 197

def contributor
  @pub["reference"].sub(/\s.*/, "").split("/").map do |abbrev|
    name, url = ABBREVS[abbrev]
    { entity: { name: name, url: url, abbreviation: abbrev },
      role: [type: "publisher"] }
  end
end

Returns:

  • (Array<Hash>)


148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/relaton_iec/data_parser.rb', line 148

def copyright # rubocop:disable Metrics/AbcSize
  from = @pub["reference"].match(/(?<=:)\d{4}/).to_s
  from = @pub["releaseDate"]&.match(/\d{4}/).to_s if from.empty?
  return [] if from.nil? || from.empty?

  abbreviation = @pub["reference"].match(/.*?(?=\s)/).to_s
  owner = abbreviation.split("/").map do |abbrev|
    name, url = ABBREVS[abbrev]
    { name: name, abbreviation: abbrev, url: url }
  end
  [{ owner: owner, from: from }]
end

#create_relations(doc) ⇒ Array<Hash>

Create relations.

Parameters:

  • doc (Nokogiri::XML::Document)

    XML document

Returns:

  • (Array<Hash>)

    relations



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/relaton_iec/data_parser.rb', line 272

def create_relations(doc) # rubocop:disable Metrics/MethodLength
  doc.xpath('//ROW[STATUS[.!="PREPARING" and .!="PUBLISHED"]]')
    .map do |r|
    r_type = r.at("STATUS").text.downcase
    type = case r_type
           when "revised", "replaced" then "updates"
           when "withdrawn" then "obsoletes"
           else r_type
           end
    ref = r.at("FULL_NAME").text
    fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain"
    docid = RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true)
    bibitem = IecBibliographicItem.new(formattedref: fref, docid: [docid])
    RelatonBib::DocumentRelation.new type: type, bibitem: bibitem
  end
end

#dateArray<RelatonBib::BibliographicDate>

Parse dates.

Returns:

  • (Array<RelatonBib::BibliographicDate>)

    dates



179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/relaton_iec/data_parser.rb', line 179

def date
  {
    "published" => "publicationDate",
    "stable-until" => "stabilityDate",
    "confirmed" => "confirmationDate",
    "obsoleted" => "dateOfWithdrawal",
  }.reduce([]) do |a, (k, v)|
    next a unless @pub[v]

    a << RelatonBib::BibliographicDate.new(type: k, on: @pub[v])
  end
end

#docidArray<RelatonBib::DocumentIdentifier>

Parse document identifiers.

Returns:

  • (Array<RelatonBib::DocumentIdentifier>)

    document identifiers



46
47
48
49
50
51
# File 'lib/relaton_iec/data_parser.rb', line 46

def docid
  ids = []
  ids << RelatonBib::DocumentIdentifier.new(id: @pub["reference"], type: "IEC", primary: true)
  urnid = "urn:#{@pub['urnAlt'][0]}"
  ids << RelatonBib::DocumentIdentifier.new(id: urnid, type: "URN")
end

#doctypeString

Parse document type.

Returns:

  • (String)

    document type



235
236
237
238
239
240
241
242
243
244
# File 'lib/relaton_iec/data_parser.rb', line 235

def doctype
  case @pub["stdType"]
  when "IS" then "international-standard"
  when "TR" then "technical-report"
  when "TS" then "technical-specification"
  when "PAS" then "publicly-available-specification"
  when "SRD" then "system-reference-delivrabble"
  else @pub["stdType"].downcase
  end
end

#editorialgroupHash

Parse editorial group.

Returns:

  • (Hash)

    editorial group



120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/relaton_iec/data_parser.rb', line 120

def editorialgroup
  return unless @pub["committee"]

  wg = @pub["committee"]["reference"]
  {
    technical_committee: [{
      name: wg,
      type: "technicalCommittee",
      number: wg.match(/\d+/)&.to_s&.to_i,
    }],
  }
end

#icsArray<RelatonIsoBib::Ics>

Fetche ics.

Returns:

  • (Array<RelatonIsoBib::Ics>)

    ics



166
167
168
169
170
171
172
# File 'lib/relaton_iec/data_parser.rb', line 166

def ics
  return [] unless @pub["classifications"]

  @pub["classifications"].select { |c| c["type"] == "ICS" }.map do |c|
    RelatonIsoBib::Ics.new(c["value"])
  end
end

#lang_to_script(lang) ⇒ String

Detect script.

Parameters:

  • lang (String)

    language

Returns:

  • (String)

    script



96
97
98
99
100
# File 'lib/relaton_iec/data_parser.rb', line 96

def lang_to_script(lang)
  case lang
  when "en", "fr", "es" then "Latn"
  end
end

#languageArray<String>

Parse languages.

Returns:

  • (Array<String>)

    languages



73
74
75
# File 'lib/relaton_iec/data_parser.rb', line 73

def language
  @pub["title"].map { |t| t["lang"] }.uniq
end

Parse links.

Returns:

  • (Array<RelatonBib::TypedUri>)

    links



210
211
212
213
214
215
216
217
218
219
# File 'lib/relaton_iec/data_parser.rb', line 210

def link
  url = "#{DOMAIN}/publication/#{urn_id}"
  l = [RelatonBib::TypedUri.new(content: url, type: "src")]
  RelatonBib.array(@pub["releaseItems"]).each_with_object(l) do |r, a|
    next unless r["type"] == "PREVIEW"

    url = "#{DOMAIN}/preview/#{r['contentRef']['fileName']}"
    a << RelatonBib::TypedUri.new(content: url, type: "obp")
  end
end

#parseRelatonIec::IecBibliographicItem

Parse document.

Returns:



32
33
34
35
36
37
38
39
# File 'lib/relaton_iec/data_parser.rb', line 32

def parse # rubocop:disable Metrics/AbcSize
  args = ATTRS.each_with_object({}) { |a, h| h[a] = send a }
  args[:docstatus] = RelatonBib::DocumentStatus.new stage: @pub["status"]
  args[:edition] = @pub["edition"]
  args[:price_code] = @pub["priceInfo"]["priceCode"]
  args[:place] = ["Geneva"]
  IecBibliographicItem.new(**args)
end

#relationArray<RelatonBib::DocumentRelation>

Parse relation.

Returns:

  • (Array<RelatonBib::DocumentRelation>)

    relation



251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/relaton_iec/data_parser.rb', line 251

def relation # rubocop:disable Metrics/MethodLength
  try = 0
  begin
    uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?" \
              "Openagent&url=#{urn_id}"
    resp = Net::HTTP.get_response uri
    doc = Nokogiri::XML resp.body
    create_relations doc
  rescue StandardError => e
    try += 1
    try < 3 ? retry : raise(e)
  end
end

#scriptArray<String>

Parse scripts.

Returns:

  • (Array<String>)

    scripts



82
83
84
85
86
87
# File 'lib/relaton_iec/data_parser.rb', line 82

def script
  language.each_with_object([]) do |l, s|
    scr = lang_to_script l
    s << scr if scr && !s.include?(scr)
  end
end

#structuredidentifierRelatonIsoBib::StructuredIdentifier

Parse structured identifier.

Returns:

  • (RelatonIsoBib::StructuredIdentifier)

    structured identifier



58
59
60
61
62
63
64
65
66
# File 'lib/relaton_iec/data_parser.rb', line 58

def structuredidentifier
  m = @pub["reference"].match(
    /(?<=\s)(?<project>\w+)(?:-(?<part>\w*)(?:-(?<subpart>\w*))?)?/,
  )
  RelatonIsoBib::StructuredIdentifier.new(
    project_number: m[:project], part: m[:part], subpart: m[:subpart],
    type: "IEC", id: @pub["reference"]
  )
end

#titleArray<RelatonBib::TypedTitleString>

Parse titles.

Returns:

  • (Array<RelatonBib::TypedTitleString>)

    titles



107
108
109
110
111
112
113
# File 'lib/relaton_iec/data_parser.rb', line 107

def title
  @pub["title"].map do |t|
    RelatonBib::TypedTitleString.new(
      content: t["value"], language: t["lang"], script: lang_to_script(t["lang"]), type: "main",
    )
  end
end

#urn_idString

Extract URN ID from URN.

Returns:

  • (String)

    URN ID



226
227
228
# File 'lib/relaton_iec/data_parser.rb', line 226

def urn_id
  @pub["urn"].split(":").last
end