Class: RelatonBipm::BipmSiBrochureParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_bipm/bipm_si_brochure_parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data_fetcher) ⇒ BipmSiBrochureParser

Create new parser



8
9
10
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 8

def initialize(data_fetcher)
  @data_fetcher = WeakRef.new data_fetcher
end

Class Method Details

.parse(data_fetcher) ⇒ Object

Parse documents from SI brochure dataset and write thems to YAML files



17
18
19
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 17

def self.parse(data_fetcher)
  new(data_fetcher).parse
end

Instance Method Details

#deep_merge(hash1, hash2) ⇒ Hash

Deep merge two hashes



100
101
102
103
104
105
106
107
108
109
110
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 100

def deep_merge(hash1, hash2) # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
  hash1.merge(hash2) do |_, oldval, newval|
    if oldval.is_a?(Hash) && newval.is_a?(Hash)
      deep_merge(oldval, newval)
    elsif oldval.is_a?(Array) && newval.is_a?(Array)
      (oldval + newval).uniq { |i| downcase_all i }
    else
      newval || oldval
    end
  end
end

#downcase_all(content) ⇒ Array, ...

Downcase all values in hash or array



119
120
121
122
123
124
125
126
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 119

def downcase_all(content)
  case content
  when Hash then content.transform_values { |v| downcase_all v }
  when Array then content.map { |v| downcase_all v }
  when String then content.downcase
  else content
  end
end

#fix_si_brochure_id(hash) ⇒ void

This method returns an undefined value.

Update ID of SI brochure



62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 62

def fix_si_brochure_id(hash)
  # isbn = hash["docid"].detect { |id| id["type"] == "ISBN" }
  # num = isbn && isbn["id"] == "978-92-822-2272-0" ?  "SI Brochure" : "SI Brochure, Appendix 4"

  update_id hash

  prid = primary_id hash
  if hash["docnumber"]
    hash["docnumber"].sub!(/^Brochure(?:\sConcise|\sFAQ)?$/i, prid.sub(/^BIPM\s/, ""))
  else
    hash["docnumber"] = prid.sub(/^BIPM\s/, "")
  end
  hash["id"] = prid.gsub(/[,\s]/, "")
end

#parseObject

Parse SI brochure and write them to YAML files



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 24

def parse # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  # puts "Parsing SI brochure..."
  # puts "Ls #{Dir['*']}"
  # puts "Ls #{Dir['bipm-si-brochure/*']}"
  # puts "Ls #{Dir['bipm-si-brochure/site/*']}"
  # puts "Ls #{Dir['bipm-si-brochure/site/documents/*']}"
  Dir["bipm-si-brochure/_site/documents/*.rxl"].each do |f|
    puts "Parsing #{f}"
    docstd = Nokogiri::XML File.read f
    doc = docstd.at "/bibdata"
    hash1 = RelatonBipm::XMLParser.from_xml(doc.to_xml).to_hash
    fix_si_brochure_id hash1
    basename = File.join @data_fetcher.output, File.basename(f).sub(/(?:-(?:en|fr))?\.rxl$/, "")
    outfile = "#{basename}.#{@data_fetcher.ext}"
    key = hash1["docnumber"] || basename
    @data_fetcher.index2.add_or_update Id.new.parse(key).to_hash, outfile
    hash = if File.exist? outfile
             warn_duplicate = false
             hash2 = YAML.load_file outfile
             fix_si_brochure_id hash2
             deep_merge hash1, hash2
           else
             warn_duplicate = true
             hash1
           end
    item = RelatonBipm::BipmBibliographicItem.from_hash(**hash)
    @data_fetcher.write_file outfile, item, warn_duplicate: warn_duplicate
    puts "Saved to #{outfile}"
  end
end

#primary_id(hash) ⇒ Object



86
87
88
89
90
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 86

def primary_id(hash)
  hash["docid"].detect do |id|
    id["primary"] && (id["language"] == "en" || id["language"].nil?)
  end["id"]
end

#update_id(hash) ⇒ Object



77
78
79
80
81
82
83
84
# File 'lib/relaton_bipm/bipm_si_brochure_parser.rb', line 77

def update_id(hash)
  hash["docid"].each do |id|
    next unless id["type"] == "BIPM" && id["id"].match?(/BIPM Brochure/i)

    id["primary"] = true
    id["id"].sub!(/(?<=^BIPM\s)(Brochure)/i, "SI \\1")
  end
end