Class: Tc211::Termbase::Term

Inherits:
Object
  • Object
show all
Defined in:
lib/tc211/termbase/term.rb

Constant Summary collapse

INPUT_ATTRIBS =
i(
  id
  term
  abbrev
  synonyms
  alt
  definition
  country_code
  language_code
  notes
  examples
  entry_status
  classification
  review_indicator
  authoritative_source
  authoritative_source_similarity
  lineage_source
  lineage_source_similarity
  date_accepted
  date_amended
  review_date
  review_status
  review_type
  review_decision
  review_decision_date
  review_decision_event
  review_decision_notes
  release
).freeze
OUTPUT_ATTRIBS =
INPUT_ATTRIBS - i(term alt abbrev synonyms classification) + i(terms)
STRIP_PUNCTUATION =
[
  ":",
  ":",
  ".",
  "–",
  "\-",
].freeze
EXAMPLE_PREFIXES =

WARNING Always put the longer Regexp match in front!

{
  ara: "مثال",
  dan: "EKSEMPEL",
  # deu: "",
  # TODO: fix this, we should not have "EXAMPLES"
  eng: ["EXAMPLES", "EXAMPLE"],
  fin: "ESIM",
  fra: "Exemple",
  jpn: "例",
  kor: "보기",
  msa: "Contoh",
  nld: ["VOORBEELD", "VOORBEELDEN"],
  pol: "PRZYKŁAD",
  rus: "Пример",
  spa: "Ejemplo",
  swe: "Exempel",
  zho: "示例",
}.freeze
NOTE_PREFIXES =

WARNING Always put the longer Regexp match in front!

{
  ara: "ملاحظة",
  dan: "Note",
  # deu: "",
  eng: ["Note \\d to entry", "NOTE"],
  fin: "HUOM\\.?", # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1." (numeral added by the method)
  fra: "A noter",
  jpn: "備考",
  kor: "비고",
  msa: "catatan",
  nld: "OPMERKING",
  pol: "UWAGA",
  rus: "нота",
  spa: "Nota",
  swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
  zho: "注",
}.freeze
ALL_FULL_HALF_WIDTH_NUMBERS =

To match Chinese and Japanese numerals

"[0-90-9]".freeze
SOURCE_STATUSES =
{
  1 => "identical",
  2 => "restyle",
  3 => "context_added",
  4 => "generalisation",
  5 => "specialisation",
  6 => "unspecified",
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Term

Returns a new instance of Term.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/tc211/termbase/term.rb', line 37

def initialize(options = {})
  @examples = []
  @notes = []
  @definition = []

  # puts "options #{options.inspect}"

  options.each_pair do |k, v|
    v = v.strip if v.is_a?(String)
    next unless v

    case k
    when /^example/
      add_example(v)
    when /^note/
      add_note(v)
    else
      # puts"Key #{k}"
      key = k.gsub("-", "_")
      send("#{key}=", v)
    end
  end
  self
end

Instance Method Details

#abbreviation_term_hashObject



320
321
322
323
324
325
326
327
# File 'lib/tc211/termbase/term.rb', line 320

def abbreviation_term_hash
  return unless abbrev

  {
    "type" => "abbreviation",
    "designation" => abbrev,
  }
end

#add_example(example) ⇒ Object



123
124
125
126
# File 'lib/tc211/termbase/term.rb', line 123

def add_example(example)
  c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
  @examples << c unless c.empty?
end

#add_note(note) ⇒ Object



128
129
130
131
# File 'lib/tc211/termbase/term.rb', line 128

def add_note(note)
  c = clean_prefixed_string(note, NOTE_PREFIXES)
  @notes << c unless c.empty?
end

#alt_term_hashObject



310
311
312
313
314
315
316
317
318
# File 'lib/tc211/termbase/term.rb', line 310

def alt_term_hash
  return unless alt

  {
    "type" => "expression",
    "designation" => alt,
    "normative_status" => classification,
  }
end

#authoritative_source=(source) ⇒ Object



214
215
216
217
# File 'lib/tc211/termbase/term.rb', line 214

def authoritative_source=(source)
  clean_source!(source)
  @authoritative_source = source
end

#authoritative_source_arrayObject



359
360
361
362
363
364
365
# File 'lib/tc211/termbase/term.rb', line 359

def authoritative_source_array
  return unless authoritative_source

  [
    "link" => authoritative_source["link"],
  ]
end

#authoritative_source_hashObject



345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'lib/tc211/termbase/term.rb', line 345

def authoritative_source_hash
  return unless authoritative_source

  {
    origin: {
      link: authoritative_source["link"],
      ref: authoritative_source["ref"],
      clause: authoritative_source["clause"],
    },
    type: "authoritative",
    status: SOURCE_STATUSES[authoritative_source_similarity],
  }
end

#authoritative_source_similarity=(value) ⇒ Object

authoritative-source-similarity

Must be one of the following codes:
  identical = 1
  restyled = 2
  context added = 3
  generalisation = 4
  specialisation = 5
  unspecified = 6


227
228
229
230
231
232
# File 'lib/tc211/termbase/term.rb', line 227

def authoritative_source_similarity=(value)
  unless SOURCE_STATUSES.key?(value)
    value = 6
  end
  @authoritative_source_similarity = value
end

#carry_regex(mat) ⇒ Object



143
144
145
146
147
148
149
150
151
# File 'lib/tc211/termbase/term.rb', line 143

def carry_regex(mat)
  Regexp.new(
    [
      "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
      "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
      "[#{STRIP_PUNCTUATION.join}]?\s*",
    ].join,
  )
end

#classification=(value) ⇒ Object

classification Must be one of the following: preferred admitted deprecated



191
192
193
194
195
196
197
198
199
200
201
# File 'lib/tc211/termbase/term.rb', line 191

def classification=(value)
  case value
  when "", "认可的", "допустимый", "admitido", "adminitido"
    value = "admitted"
  when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
    value = "preferred"
  when %w(preferred admitted deprecated)
    # do nothing
  end
  @classification = value
end

#clean_prefixed_string(string, criterion_map) ⇒ Object



133
134
135
136
137
138
139
140
141
# File 'lib/tc211/termbase/term.rb', line 133

def clean_prefixed_string(string, criterion_map)
  carry = string.to_s.strip
  criterion_map.values.flatten.each do |mat|
    # Arabic notes/examples sometimes use parantheses around numbers
    carry = carry.sub(carry_regex(mat), "")
  end

  carry
end

#clean_source!(source) ⇒ Object



254
255
256
257
258
259
260
261
# File 'lib/tc211/termbase/term.rb', line 254

def clean_source!(source)
  if source.is_a?(Hash)
    source["ref"]&.gsub!(/\(E\),?\s*/, "")
    source["clause"]&.gsub!(/\(E\),?\s*/, "")
  else
    source.gsub!(/\(E\),?\s*/, "")
  end
end

#definition=(definition) ⇒ Object



159
160
161
# File 'lib/tc211/termbase/term.rb', line 159

def definition=(definition)
  @definition << definition
end

#entry_status=(value) ⇒ Object

entry-status Must be one of notValid valid superseded retired



176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/tc211/termbase/term.rb', line 176

def entry_status=(value)
  case value
  when "有效的", "käytössä", "действующий", "válido"
    value = "valid"
  when "korvattu", "reemplazado"
    value = "superseded"
  when "информация отсутствует" # "information absent"!?
    value = "retired"
  when %w(notValid valid superseded retired) # do nothing
  end
  @entry_status = value
end

#id=(newid) ⇒ Object

The termid should ALWAYS be an integer. github.com/riboseinc/tc211-termbase/issues/1



155
156
157
# File 'lib/tc211/termbase/term.rb', line 155

def id=(newid)
  @id = Integer(newid)
end

#lineage_source=(source) ⇒ Object



234
235
236
237
# File 'lib/tc211/termbase/term.rb', line 234

def lineage_source=(source)
  clean_source!(source)
  @lineage_source = source
end

#lineage_source_hashObject



367
368
369
370
371
372
373
374
375
376
377
# File 'lib/tc211/termbase/term.rb', line 367

def lineage_source_hash
  return unless lineage_source

  {
    origin: {
      ref: lineage_source,
    },
    type: "lineage",
    status: SOURCE_STATUSES[lineage_source_similarity],
  }
end

#lineage_source_similarity=(value) ⇒ Object

lineage-source-similarity

Must be one of the following codes:
  identical = 1
  restyled = 2
  context added = 3
  generalisation = 4
  specialisation = 5
  unspecified = 6


247
248
249
250
251
252
# File 'lib/tc211/termbase/term.rb', line 247

def lineage_source_similarity=(value)
  unless SOURCE_STATUSES.key?(value)
    value = 6
  end
  @lineage_source_similarity = value
end

#primary_term_hashObject



300
301
302
303
304
305
306
307
308
# File 'lib/tc211/termbase/term.rb', line 300

def primary_term_hash
  return unless term

  {
    "type" => "expression",
    "designation" => term,
    "normative_status" => classification,
  }
end

#retired?Boolean

Returns:

  • (Boolean)


287
288
289
# File 'lib/tc211/termbase/term.rb', line 287

def retired?
  release >= 0
end

#review_decision=(value) ⇒ Object

value Must be one of withdrawn, accepted notAccepted



280
281
282
283
284
285
# File 'lib/tc211/termbase/term.rb', line 280

def review_decision=(value)
  unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
    value = ""
  end
  @review_decision = value
end

#review_indicator=(value) ⇒ Object

review-indicator

Must be one of the following
  <empty field>
  Under Review in Source Document


207
208
209
210
211
212
# File 'lib/tc211/termbase/term.rb', line 207

def review_indicator=(value)
  unless ["", "Under Review in Source Document"].include?(value)
    value = ""
  end
  @review_indicator = value
end

#review_status=(value) ⇒ Object

value Must be one of pending tentative final



264
265
266
267
268
269
# File 'lib/tc211/termbase/term.rb', line 264

def review_status=(value)
  unless ["", "pending", "tentative", "final"].include?(value)
    value = ""
  end
  @review_status = value
end

#review_type=(value) ⇒ Object

value Must be one of supersession, retirement



272
273
274
275
276
277
# File 'lib/tc211/termbase/term.rb', line 272

def review_type=(value)
  unless ["", "supersession", "retirement"].include?(value)
    value = ""
  end
  @review_type = value
end

#sources_hashObject



338
339
340
341
342
343
# File 'lib/tc211/termbase/term.rb', line 338

def sources_hash
  [
    authoritative_source_hash,
    lineage_source_hash,
  ].compact
end

#synonyms_term_hashObject



329
330
331
332
333
334
335
336
# File 'lib/tc211/termbase/term.rb', line 329

def synonyms_term_hash
  return unless synonyms

  {
    "type" => "expression",
    "designation" => synonyms,
  }
end

#termsObject



291
292
293
294
295
296
297
298
# File 'lib/tc211/termbase/term.rb', line 291

def terms
  [
    primary_term_hash,
    alt_term_hash,
    abbreviation_term_hash,
    synonyms_term_hash,
  ].compact
end

#to_hashObject



163
164
165
166
167
168
169
170
171
172
# File 'lib/tc211/termbase/term.rb', line 163

def to_hash
  OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
    value = send(attrib)
    if value.nil?
      acc
    else
      acc.merge(attrib.to_s => value)
    end
  end
end

#to_localized_concept_hashObject



379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# File 'lib/tc211/termbase/term.rb', line 379

def to_localized_concept_hash
  concept_hash = to_hash

  %w[
    review_status
    review_decision
    review_decision_notes
    review_indicator
    authoritative_source
    authoritative_source_similarity
    lineage_source
    lineage_source_similarity
    country_code
  ].each do |key|
    concept_hash.delete(key)
  end

  concept_hash["id"] = concept_hash["id"].to_s
  concept_hash["sources"] = sources_hash

  if authoritative_source_array
    concept_hash["authoritativeSource"] = authoritative_source_array
  end

  concept_hash
end