Class: RelatonBipm::DataOutcomesParser
- Inherits:
-
Object
- Object
- RelatonBipm::DataOutcomesParser
- Defined in:
- lib/relaton_bipm/data_outcomes_parser.rb
Constant Summary collapse
- SHORTTYPE =
{ "Resolution" => "RES", "Recommendation" => "REC", "Decision" => "DECN", "Statement" => "DECL", "Declaration" => "DECL", "Action" => "ACT", }.freeze
- TRANSLATIONS =
{ "Declaration" => "Déclaration", "Meeting" => "Réunion", "Recommendation" => "Recommandation", "Resolution" => "Résolution", "Decision" => "Décision", }.freeze
Class Method Summary collapse
-
.parse(data_fetcher) ⇒ Object
Parse documents from data-outcomes dataset and write them to YAML files.
Instance Method Summary collapse
-
#add_part(hash, part) ⇒ Object
Add part to ID and structured identifier.
-
#add_to_index(item, path) ⇒ Object
Add item to index.
-
#author_org(date, body) ⇒ Hash?
Create author organization.
-
#bipm_org ⇒ Hash
Create BIPM organization.
-
#cctf_org(date) ⇒ Hash
Create CCTF organization.
-
#cgpm_org ⇒ Hash
Create CGPM organization.
-
#cipm_org ⇒ Array<Hash>
Create CIPM organization.
-
#contributors(date, body) ⇒ Array<Hash>
Create contributors.
-
#create_id(body, type, num, date) ⇒ String
Create ID.
-
#create_links(**args) ⇒ Array<Hash>
Create links.
- #create_meeting_docids(en_id) ⇒ Object
-
#create_meeting_docnum(body, type, num, date) ⇒ String
Create meeting document number.
-
#create_resolution_docids(body, type, num, date) ⇒ Array<RelatonBib::DocumentIdentifier>
Create documetn IDs.
-
#create_resolution_docnum(body, type, num, date) ⇒ String
Creata resolution document number.
-
#create_title(content, language, format = "text/plain") ⇒ Hash
Create a title.
- #create_titles(data) ⇒ Object
-
#fetch_body(dir) ⇒ Object
Search for English meetings in the body directory.
-
#fetch_meeting(en_file, body, type, dir) ⇒ Object
Create and write BIPM meeting/resolution.
-
#fetch_resolution(**args) ⇒ Object
Parse BIPM resolutions and write them to YAML files.
-
#fetch_type(dir, body) ⇒ Object
Search for meetings.
-
#initialize(data_fetcher) ⇒ DataOutcomesParser
constructor
Create data-outcomes parser.
-
#make_docid(**args) ⇒ RelatonBib::DocumentIdentifier
Create doucment ID.
-
#meeting_bibitem(**args) ⇒ Hash
Create hash from BIPM meeting.
- #meeting_links(en_file, fr_file) ⇒ Object
- #meeting_md(eng, frn) ⇒ Object
-
#organization(names, abbr) ⇒ Hash
Create organization.
-
#parse ⇒ Object
Parse BIPM meeting and write them to YAML files.
- #parse_file(path) ⇒ Object
-
#read_files(en_file) ⇒ Array<Hash, String, nil>
Read English and French files.
- #resolution_fr_long_id(body, type, num, year) ⇒ Object
-
#resolution_link(en_r, fr_r, src) ⇒ Array<Hash>
Parse resolution links.
- #resolution_long_ids(body, type, num, year) {|make_docid id: en, type: "BIPM-long", language: "en", script: "Latn"| ... } ⇒ Object
- #resolution_short_ids(body, type, num, year) {|make_docid(id: short, type: "BIPM", primary: true)| ... } ⇒ Object
-
#resolution_title(en_r, fr_r) ⇒ Array<Hash>
Parse resolution titles.
-
#special_id_case?(body, type, year) ⇒ Boolean
Check if ID is special case.
Constructor Details
#initialize(data_fetcher) ⇒ DataOutcomesParser
Create data-outcomes parser
25 26 27 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 25 def initialize(data_fetcher) @data_fetcher = WeakRef.new data_fetcher end |
Class Method Details
.parse(data_fetcher) ⇒ Object
Parse documents from data-outcomes dataset and write them to YAML files
34 35 36 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 34 def self.parse(data_fetcher) new(data_fetcher).parse end |
Instance Method Details
#add_part(hash, part) ⇒ Object
Add part to ID and structured identifier
375 376 377 378 379 380 381 382 383 384 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 375 def add_part(hash, part) regex = /(\p{L}+\s(?:\w+\/)?\d+)(?![\d-])/ hash[:id] += "-#{part}" hash[:docnumber].sub!(regex) { |m| "#{m}-#{part}" } hash[:docid].select { |id| id.type == "BIPM" }.each do |did| did.instance_variable_get(:@id).sub!(regex) { "#{$1}-#{part}" } # did.instance_variable_set(:@id, id) end hash[:structuredidentifier].instance_variable_set :@part, part end |
#add_to_index(item, path) ⇒ Object
Add item to index
242 243 244 245 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 242 def add_to_index(item, path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength key = Id.new.parse(item.docnumber).to_hash @data_fetcher.index2.add_or_update key, path end |
#author_org(date, body) ⇒ Hash?
Create author organization
270 271 272 273 274 275 276 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 270 def (date, body) case body when "CCTF" then cctf_org date when "CGPM" then cgpm_org when "CIPM" then cipm_org end end |
#bipm_org ⇒ Hash
Create BIPM organization
283 284 285 286 287 288 289 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 283 def bipm_org nms = [ { content: "International Bureau of Weights and Measures", language: "en" }, { content: "Bureau international des poids et mesures", language: "fr" }, ] organization(nms, "BIPM").tap { |org| org[:url] = "www.bipm.org" } end |
#cctf_org(date) ⇒ Hash
Create CCTF organization
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 298 def cctf_org(date) # rubocop:disable Metrics/MethodLength if Date.parse(date).year < 1999 nms = [ { content: "Consultative Committee for the Definition of the Second", language: "en" }, { content: "Comité Consultatif pour la Définition de la Seconde", language: "fr" }, ] organization nms, "CCDS" else nms = [ { content: "Consultative Committee for Time and Frequency", language: "en" }, { content: "Comité consultatif du temps et des fréquences", language: "fr" }, ] organization nms, "CCTF" end end |
#cgpm_org ⇒ Hash
Create CGPM organization
332 333 334 335 336 337 338 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 332 def cgpm_org nms = [ { content: "General Conference on Weights and Measures", language: "en" }, { content: "Conférence Générale des Poids et Mesures", language: "fr" }, ] organization nms, "CGPM" end |
#cipm_org ⇒ Array<Hash>
Create CIPM organization
345 346 347 348 349 350 351 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 345 def cipm_org names = [ { content: "International Committee for Weights and Measures", language: "en" }, { content: "Comité international des poids et mesures", language: "fr" }, ] organization names, "CIPM" end |
#contributors(date, body) ⇒ Array<Hash>
Create contributors
255 256 257 258 259 260 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 255 def contributors(date, body) contribs = [{ entity: bipm_org, role: [{ type: "publisher" }] }] = date, body contribs << { entity: , role: [{ type: "author" }] } if contribs end |
#create_id(body, type, num, date) ⇒ String
Create ID
484 485 486 487 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 484 def create_id(body, type, num, date) year = Date.parse(date).year [body, SHORTTYPE[type.capitalize], year, num].compact.join("-") end |
#create_links(**args) ⇒ Array<Hash>
Create links
430 431 432 433 434 435 436 437 438 439 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 430 def create_links(**args) links = args.slice(:en, :fr).each_with_object([]) do |(lang, md), mem| next unless md && md["url"] mem << { type: "citation", content: md["url"], language: lang.to_s, script: "Latn" } end RelatonBib.array(args[:pdf]).each { |pdf| links << { type: "pdf", content: pdf } } links += args[:src] if args[:src] links end |
#create_meeting_docids(en_id) ⇒ Object
557 558 559 560 561 562 563 564 565 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 557 def create_meeting_docids(en_id) fr_id = en_id.sub(/(\d+)(?:st|nd|rd|th)/, '\1e').sub("Meeting", "réunion") fr_id_sup = fr_id.sub(/(\d+)(e)/, '\1<sup>\2</sup>') [ make_docid(id: en_id, type: "BIPM", primary: true, language: "en", script: "Latn"), make_docid(id: fr_id_sup, type: "BIPM", primary: true, language: "fr", script: "Latn"), make_docid(id: "#{en_id} / #{fr_id_sup}", type: "BIPM", primary: true), ] end |
#create_meeting_docnum(body, type, num, date) ⇒ String
Create meeting document number
468 469 470 471 472 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 468 def create_meeting_docnum(body, type, num, date) year = Date.parse(date).year ord = %w[th st nd rd th th th th th th][num.to_i % 10] "#{body} #{num}#{ord} #{type} (#{year})" end |
#create_resolution_docids(body, type, num, date) ⇒ Array<RelatonBib::DocumentIdentifier>
Create documetn IDs
510 511 512 513 514 515 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 510 def create_resolution_docids(body, type, num, date) year = Date.parse(date).year ids = [] resolution_short_ids(body, type, num, year) { |id| ids << id } resolution_long_ids(body, type, num, year) { |id| ids << id } end |
#create_resolution_docnum(body, type, num, date) ⇒ String
Creata resolution document number
451 452 453 454 455 456 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 451 def create_resolution_docnum(body, type, num, date) year = Date.parse(date).year id = "#{body} #{SHORTTYPE[type.capitalize]}" id += " #{num}" if num.to_i.positive? "#{id} (#{year})" end |
#create_title(content, language, format = "text/plain") ⇒ Hash
Create a title
361 362 363 364 365 366 367 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 361 def create_title(content, language, format = "text/plain") if language == "fr" content.sub!(/(\d+)(e)/, '\1<sup>\2</sup>') format = "text/html" if content.match?(/<sup>/) end { content: content, language: language, script: "Latn", format: format } end |
#create_titles(data) ⇒ Object
417 418 419 420 421 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 417 def create_titles(data) data.each_with_object([]) do |(lang, md), mem| mem << create_title(md["title"], lang.to_s) if md && md["title"] end end |
#fetch_body(dir) ⇒ Object
Search for English meetings in the body directory
52 53 54 55 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 52 def fetch_body(dir) body = dir.split("/").last.upcase Dir[File.join(dir, "*-en")].each { |type_dir| fetch_type type_dir, body } end |
#fetch_meeting(en_file, body, type, dir) ⇒ Object
Create and write BIPM meeting/resolution
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 80 def fetch_meeting(en_file, body, type, dir) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength _, en, fr_file, fr = read_files en_file en_md, fr_md, num, part = meeting_md en, fr src = meeting_links en_file, fr_file file = "#{num}.#{@data_fetcher.ext}" path = File.join dir, file hash = meeting_bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"] if @data_fetcher.files.include?(path) && part add_part hash, part item = RelatonBipm::BipmBibliographicItem.new(**hash) has_part_item = parse_file path has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item) @data_fetcher.write_file path, has_part_item, warn_duplicate: false path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}" elsif part hash[:title].each { |t| t[:content] = t[:content].sub(/\s\(.+\)$/, "") } h = meeting_bibitem body: body, type: type, en: en_md, fr: fr_md, num: num, src: src, pdf: en["pdf"] add_part h, part part_item = RelatonBipm::BipmBibliographicItem.new(**h) part_item_path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}" @data_fetcher.write_file part_item_path, part_item add_to_index part_item, part_item_path hash[:relation] = [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: part_item)] item = RelatonBipm::BipmBibliographicItem.new(**hash) else item = RelatonBipm::BipmBibliographicItem.new(**hash) end @data_fetcher.write_file path, item add_to_index item, path fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num end |
#fetch_resolution(**args) ⇒ Object
Parse BIPM resolutions and write them to YAML files
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 168 def fetch_resolution(**args) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity args[:en]["resolutions"].each.with_index do |r, i| # rubocop:disable Metrics/BlockLength hash = { type: "proceedings", title: [], doctype: DocumentType.new(type: r["type"]), place: [RelatonBib::Place.new(city: "Paris")] } fr_r = args.dig(:fr, "resolutions", i) # @TODO: create a GH issue when fr is missing hash[:title] = resolution_title r, fr_r hash[:link] = resolution_link r, fr_r, args[:src] date = r["dates"].first.to_s hash[:date] = [{ type: "published", on: date }] num = r["identifier"].to_s # .split("-").last year = date.split("-").first num = "0" if num == year num_justed = num.rjust 2, "0" type = r["type"].capitalize docnum = create_resolution_docnum args[:body], type, num, date hash[:id] = create_id(args[:body], type, num_justed, date) hash[:docid] = create_resolution_docids args[:body], type, num, date hash[:docnumber] = docnum hash[:language] = %w[en fr] hash[:script] = ["Latn"] hash[:contributor] = contributors date, args[:body] hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: num item = RelatonBipm::BipmBibliographicItem.new(**hash) file = "#{year}-#{num_justed}.#{@data_fetcher.ext}" out_dir = File.join args[:dir], r["type"].downcase FileUtils.mkdir_p out_dir path = File.join out_dir, file @data_fetcher.write_file path, item add_to_index item, path end end |
#fetch_type(dir, body) ⇒ Object
Search for meetings
63 64 65 66 67 68 69 70 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 63 def fetch_type(dir, body) # rubocop:disable Metrics/AbcSize type = dir.split("/").last.split("-").first.sub(/s$/, "").capitalize body_dir = File.join @data_fetcher.output, body.downcase FileUtils.mkdir_p body_dir outdir = File.join body_dir, type.downcase FileUtils.mkdir_p outdir Dir[File.join(dir, "*.{yml,yaml}")].each { |en_file| fetch_meeting en_file, body, type, outdir } end |
#make_docid(**args) ⇒ RelatonBib::DocumentIdentifier
Create doucment ID
578 579 580 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 578 def make_docid(**args) RelatonBib::DocumentIdentifier.new(**args) end |
#meeting_bibitem(**args) ⇒ Hash
Create hash from BIPM meeting
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 400 def meeting_bibitem(**args) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity docnum = create_meeting_docnum args[:body], args[:type], args[:num], args[:en]["date"] hash = { title: [], type: "proceedings", doctype: DocumentType.new(type: args[:type]), place: [RelatonBib::Place.new(city: "Paris")] } hash[:title] = create_titles args.slice(:en, :fr) hash[:date] = [{ type: "published", on: args[:en]["date"] }] hash[:docid] = create_meeting_docids docnum hash[:docnumber] = docnum # .sub(" --", "").sub(/\s\(\d{4}\)/, "") hash[:id] = create_id(args[:body], args[:type], args[:num], args[:en]["date"]) hash[:link] = create_links(**args) hash[:language] = %w[en fr] hash[:script] = ["Latn"] hash[:contributor] = contributors args[:en]["date"], args[:body] hash[:structuredidentifier] = RelatonBipm::StructuredIdentifier.new docnumber: args[:num] hash end |
#meeting_links(en_file, fr_file) ⇒ Object
148 149 150 151 152 153 154 155 156 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 148 def meeting_links(en_file, fr_file) gh_src = "https://raw.githubusercontent.com/metanorma/bipm-data-outcomes/" { "en" => en_file, "fr" => fr_file }.map do |lang, file| next unless file src = gh_src + file.split("/")[-3..].unshift("main").join("/") { type: "src", content: src, language: lang, script: "Latn" } end.compact end |
#meeting_md(eng, frn) ⇒ Object
142 143 144 145 146 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 142 def meeting_md(eng, frn) en_md = eng["metadata"] num, part = en_md["identifier"].to_s.split("-") [en_md, frn&.dig("metadata"), num, part] end |
#organization(names, abbr) ⇒ Hash
Create organization
322 323 324 325 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 322 def organization(names, abbr) names.each { |ctrb| ctrb[:script] = "Latn" } { name: names, abbreviation: { content: abbr, language: ["en", "fr"], script: "Latn" } } end |
#parse ⇒ Object
Parse BIPM meeting and write them to YAML files
41 42 43 44 45 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 41 def parse dirs = "cctf,cgpm,cipm,ccauv,ccem,ccl,ccm,ccpr,ccqm,ccri,cct,ccu,jcgm,jcrb" source_path = File.join "bipm-data-outcomes", "{#{dirs}}" Dir[source_path].each { |body_dir| fetch_body(body_dir) } end |
#parse_file(path) ⇒ Object
113 114 115 116 117 118 119 120 121 122 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 113 def parse_file(path) case @data_fetcher.format when "yaml" yaml = RelatonBib.parse_yaml(File.read(path, encoding: "UTF-8"), [Date]) RelatonBipm::BipmBibliographicItem.from_hash(yaml) when "xml" xml = File.read(path, encoding: "UTF-8") RelatonBipm::XMLParser.from_xml xml end end |
#read_files(en_file) ⇒ Array<Hash, String, nil>
Read English and French files
131 132 133 134 135 136 137 138 139 140 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 131 def read_files(en_file) fr_file = en_file.sub "en", "fr" [en_file, fr_file].map do |file| if File.exist? file data = RelatonBib.parse_yaml(File.read(file, encoding: "UTF-8"), [Date]) path = file end [path, data] end.flatten end |
#resolution_fr_long_id(body, type, num, year) ⇒ Object
544 545 546 547 548 549 550 551 552 553 554 555 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 544 def resolution_fr_long_id(body, type, num, year) fr = TRANSLATIONS[type] || type if special_id_case? body, type, year fr += " #{body}" fr += "/#{num}" if num.to_i.positive? else fr += " #{num}" if num.to_i.positive? fr += body == "CGPM" ? " de la" : " du" fr += " #{body}" end "#{fr} (#{year})" end |
#resolution_link(en_r, fr_r, src) ⇒ Array<Hash>
Parse resolution links
226 227 228 229 230 231 232 233 234 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 226 def resolution_link(en_r, fr_r, src) link = [{ type: "citation", content: en_r["url"], language: "en", script: "Latn" }] if fr_r link << { type: "citation", content: fr_r["url"], language: "fr", script: "Latn" } end link += src link << { type: "pdf", content: en_r["reference"] } if en_r["reference"] link end |
#resolution_long_ids(body, type, num, year) {|make_docid id: en, type: "BIPM-long", language: "en", script: "Latn"| ... } ⇒ Object
532 533 534 535 536 537 538 539 540 541 542 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 532 def resolution_long_ids(body, type, num, year, &_block) en = "#{body} #{type}" en += " #{num}" if num.to_i.positive? en += " (#{year})" yield make_docid id: en, type: "BIPM-long", language: "en", script: "Latn" fr = resolution_fr_long_id(body, type, num, year) yield make_docid id: fr, type: "BIPM-long", language: "fr", script: "Latn" yield make_docid(id: "#{en} / #{fr}", type: "BIPM-long") end |
#resolution_short_ids(body, type, num, year) {|make_docid(id: short, type: "BIPM", primary: true)| ... } ⇒ Object
517 518 519 520 521 522 523 524 525 526 527 528 529 530 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 517 def resolution_short_ids(body, type, num, year, &_block) short_type = SHORTTYPE[type] id = "#{body} #{short_type}" id += " #{num}" if num.to_i.positive? short = "#{id} (#{year})" yield make_docid(id: short, type: "BIPM", primary: true) en = "#{id} (#{year}, E)" yield make_docid(id: en, type: "BIPM", primary: true, language: "en", script: "Latn") fr = "#{id} (#{year}, F)" yield make_docid(id: fr, type: "BIPM", primary: true, language: "fr", script: "Latn") end |
#resolution_title(en_r, fr_r) ⇒ Array<Hash>
Parse resolution titles
210 211 212 213 214 215 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 210 def resolution_title(en_r, fr_r) title = [] title << create_title(en_r["title"], "en") if en_r["title"] && !en_r["title"].empty? title << create_title(fr_r["title"], "fr") if fr_r && fr_r["title"] && !fr_r["title"].empty? title end |
#special_id_case?(body, type, year) ⇒ Boolean
Check if ID is special case
498 499 500 501 |
# File 'lib/relaton_bipm/data_outcomes_parser.rb', line 498 def special_id_case?(body, type, year) (body == "CIPM" && type == "Decision" && year.to_i > 2011) || (body == "JCRB" && %w[Recomendation Resolution Descision].include?(type)) end |