Class: Iev::TermBuilder

Inherits:

Object

Object
Iev::TermBuilder

show all

Includes:: Cli::Ui, Utilities

Defined in:: lib/iev/term_builder.rb

Constant Summary

Constants included from Utilities

Utilities::FIGURE_ONE_REGEX, Utilities::FIGURE_TWO_REGEX, Utilities::IMAGE_PATH_PREFIX, Utilities::SIMG_PATH_REGEX

Instance Attribute Summary collapse

#data ⇒ Object readonly

Returns the value of attribute data.

Class Method Summary collapse

.build_from(data) ⇒ Object

Instance Method Summary collapse

#build ⇒ Object
#build_term_object ⇒ Object
#extract_authoritative_source ⇒ Object
#extract_classification ⇒ Object
#extract_definition_value ⇒ Object
#extract_entry_status ⇒ Object
#extract_examples ⇒ Object
#extract_international_symbol_designation ⇒ Object
#extract_notes ⇒ Object
#extract_primary_designation ⇒ Object
#extract_superseded_concepts ⇒ Object
#extract_synonymous_designations ⇒ Object
#extract_terms ⇒ Object
#find_value_for(key) ⇒ Object
#flesh_date(incomplete_date) ⇒ Object
#initialize(data) ⇒ TermBuilder constructor

A new instance of TermBuilder.
#split_definition ⇒ Object

Splits unified definition (from the spreadsheet) into separate definition, examples, and notes strings (for YAMLs).
#term_domain ⇒ Object
#term_hash ⇒ Object
#term_id ⇒ Object
#term_language ⇒ Object

Methods included from Utilities

#parse_anchor_tag, #replace_newlines

Methods included from Cli::Ui

debug, info, progress, set_ui_tag, warn

Constructor Details

#initialize(data) ⇒ `TermBuilder`

Returns a new instance of TermBuilder.



12
13
14

# File 'lib/iev/term_builder.rb', line 12

def initialize(data)
  @data = data
end

Instance Attribute Details

#data ⇒ `Object` (readonly)

Returns the value of attribute data.



24
25
26

# File 'lib/iev/term_builder.rb', line 24

def data
  @data
end

Class Method Details

.build_from(data) ⇒ `Object`



20
21
22

# File 'lib/iev/term_builder.rb', line 20

def self.build_from(data)
  new(data).build
end

Instance Method Details

#build ⇒ `Object`



16
17
18

# File 'lib/iev/term_builder.rb', line 16

def build
  build_term_object
end

#build_term_object ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 41

def build_term_object
  set_ui_tag "#{term_id} (#{term_language})"
  progress "Processing term #{term_id} (#{term_language})..."

  split_definition

  Glossarist::LocalizedConcept.from_hash(term_hash)
end

#extract_authoritative_source ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 260

def extract_authoritative_source
  source_val = find_value_for("SOURCE")
  return nil if source_val.nil?

  SourceParser.new(source_val, term_domain)
    .parsed_sources
    .compact
    .map do |source|
    source.merge({ "type" => "authoritative" })
  end
end

#extract_classification ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 245

def extract_classification
  classification_val = find_value_for("SYNONYM1STATUS")

  case classification_val
  when ""
    "admitted"
  when "认可的", "допустимый", "admitido"
    "admitted"
  when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
    "preferred"
  else
    classification_val
  end
end

#extract_definition_value ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 211

def extract_definition_value
  return unless @definition

  Iev::Converter.mathml_to_asciimath(
    replace_newlines(parse_anchor_tag(@definition, term_domain)),
  ).strip
end

#extract_entry_status ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 239

def extract_entry_status
  case find_value_for("STATUS").downcase
  when "standard" then "valid"
  end
end

#extract_examples ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 219

def extract_examples
  @examples.map do |str|
    {
      content: Iev::Converter.mathml_to_asciimath(
        replace_newlines(parse_anchor_tag(str, term_domain)),
      ).strip,
    }
  end
end

#extract_international_symbol_designation ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 206

def extract_international_symbol_designation
  raw_term = find_value_for("SYMBOLE")
  raw_term && build_symbol_designation(raw_term)
end

#extract_notes ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 229

def extract_notes
  @notes.map do |str|
    {
      content: Iev::Converter.mathml_to_asciimath(
        replace_newlines(parse_anchor_tag(str, term_domain)),
      ).strip,
    }
  end
end

#extract_primary_designation ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 178

def extract_primary_designation
  raw_term = find_value_for("TERM")
  raw_term = "NA" if raw_term == "....."

  build_expression_designation(
    raw_term,
    attribute_data: find_value_for("TERMATTRIBUTE"),
    status: "preferred",
  )
end

#extract_superseded_concepts ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 272

def extract_superseded_concepts
  replaces_val = find_value_for("REPLACES")
  return nil if replaces_val.nil?

  SupersessionParser.new(replaces_val).supersessions
end

#extract_synonymous_designations ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 189

def extract_synonymous_designations
  retval = (1..3).map do |num|
    designations = find_value_for("SYNONYM#{num}") || ""

    # Some synonyms have more than one entry
    designations.split(/<[pbr]+>/).map do |raw_term|
      build_expression_designation(
        raw_term,
        attribute_data: find_value_for("SYNONYM#{num}ATTRIBUTE"),
        status: find_value_for("SYNONYM#{num}STATUS")&.downcase,
      )
    end
  end

  retval.flatten.compact
end

#extract_terms ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 170

def extract_terms
  [
    extract_primary_designation,
    *extract_synonymous_designations,
    extract_international_symbol_designation,
  ].compact
end

#find_value_for(key) ⇒ `Object`



26
27
28

# File 'lib/iev/term_builder.rb', line 26

def find_value_for(key)
  data.fetch(key.to_sym, nil)&.sanitize
end

#flesh_date(incomplete_date) ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 30

def flesh_date(incomplete_date)
  return incomplete_date if incomplete_date.nil? || incomplete_date.empty?

  year, month, day = incomplete_date.split("-")

  month ||= "01"
  day ||= "01"

  DateTime.parse("#{year}-#{month}-#{day}").to_s
end

#split_definition ⇒ `Object`

Splits unified definition (from the spreadsheet) into separate definition, examples, and notes strings (for YAMLs).

Sets @definition, @examples and @notes variables.

# File 'lib/iev/term_builder.rb', line 103

def split_definition
  slicer_rx = %r{
    \s*
    (?:<p>\s*)?
    (
      (?<example>
        # English example
        \bEXAMPLE\b |
        ^\bExamples\s+are\b: |
        ^\bExamples\b: |
        ^\bExample\b: |
        # French examples
        \bEXEMPLE\b |
        ^\bExemples\b:
      )
      |
      (?<note>
        Note\s*\d+\sto\sentry: |
        Note&nbsp;\d+\sto\sentry: |
        Note\s*\d+\sto\sthe\sentry: |
        Note\sto\sentry\s*\d+: |
        Note\s*\d+?\sà\sl['’]article: |
        <NOTE/?>?\s*\d?\s+.*?– |
        NOTE(?:\s+-)? |
        Note\s+\d+\s– |
        Note&nbsp;\d+\s
      )
    )
    \s*
  }x

  @examples = []
  @notes = []
  definition_arr = [] # here array for consistent interface

  next_part_arr = definition_arr
  remaining_str = find_value_for("DEFINITION")

  while (md = remaining_str&.match(slicer_rx))
    next_part = md.pre_match
    next_part.sub!(/^\[:Ex(a|e)mple\]/, 'Ex\\1mple')
    next_part_arr.push(next_part)
    next_part_arr = md[:example] ? @examples : @notes
    # 112-03-17
    # supplements the name of a quantity, especially for a component in a
    # system, to indicate the quotient of that quantity by the total
    # volume
    # <NOTE – Examples: amount-of-substance volume concentration of
    # component B (or concentration of B, in particular, ion
    # concentration), molecular concentration of B, electron concentration
    # (or electron density).
    #
    # In the above case the `Example` is part of the note but the regex
    # above will capture it as an example and will add an empty `Note`
    # and put the rest in an `Example`. So In this case we will replace
    # the `Example` with `[:Example]` and revert it in the next iteration
    # so it will not be caught by the regex.
    remaining_str = md.post_match
    remaining_str.sub!(/^Ex(a|e)mple/, '[:Ex\\1mple]') if md[:note]
  end

  remaining_str&.sub!(/^\[:Ex(a|e)mple\]/, 'Ex\\1mple')
  next_part_arr.push(remaining_str)
  @definition = definition_arr.first
  @definition = nil if @definition&.empty?
end

#term_domain ⇒ `Object`



91
92
93

# File 'lib/iev/term_builder.rb', line 91

def term_domain
  @term_domain ||= term_id.slice(0, 3)
end

#term_hash ⇒ `Object`

# File 'lib/iev/term_builder.rb', line 50

def term_hash
  dates = nil

  if flesh_date(find_value_for("PUBLICATIONDATE"))
    dates = [
      {
        type: :accepted,
        date: flesh_date(find_value_for("PUBLICATIONDATE")),
      },
      {
        type: :amended,
        date: flesh_date(find_value_for("PUBLICATIONDATE")),
      },
    ]
  end

  {
    id: term_id,
    classification: extract_classification,
    entry_status: extract_entry_status,
    data: {
      id: term_id,
      dates: dates,
      definition: [{ "content" => extract_definition_value }],
      examples: extract_examples,
      notes: extract_notes,
      terms: extract_terms,
      review_date: flesh_date(find_value_for("PUBLICATIONDATE")),
      review_decision_date: flesh_date(find_value_for("PUBLICATIONDATE")),
      review_decision_event: "published",
      language_code: term_language,
      sources: extract_authoritative_source,
      related: extract_superseded_concepts,
    }.compact,
  }.compact
end

#term_id ⇒ `Object`



87
88
89

# File 'lib/iev/term_builder.rb', line 87

def term_id
  @term_id ||= find_value_for("IEVREF")
end

#term_language ⇒ `Object`



95
96
97

# File 'lib/iev/term_builder.rb', line 95

def term_language
  @term_language ||= find_value_for("LANGUAGE").to_three_char_code
end

Class: Iev::TermBuilder

Constant Summary

Constants included from Utilities

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utilities

Methods included from Cli::Ui

Constructor Details

#initialize(data) ⇒ TermBuilder

Instance Attribute Details

#data ⇒ Object (readonly)

Class Method Details

.build_from(data) ⇒ Object

Instance Method Details

#build ⇒ Object

#build_term_object ⇒ Object

#extract_authoritative_source ⇒ Object

#extract_classification ⇒ Object

#extract_definition_value ⇒ Object

#extract_entry_status ⇒ Object

#extract_examples ⇒ Object

#extract_international_symbol_designation ⇒ Object

#extract_notes ⇒ Object

#extract_primary_designation ⇒ Object

#extract_superseded_concepts ⇒ Object

#extract_synonymous_designations ⇒ Object

#extract_terms ⇒ Object

#find_value_for(key) ⇒ Object

#flesh_date(incomplete_date) ⇒ Object

#split_definition ⇒ Object

#term_domain ⇒ Object

#term_hash ⇒ Object

#term_id ⇒ Object

#term_language ⇒ Object