Class: Tc211::Termbase::Term

Inherits:
Object
  • Object
show all
Defined in:
lib/tc211/termbase/term.rb

Constant Summary collapse

INPUT_ATTRIBS =
i(
  id
  term
  abbrev
  synonyms
  alt
  definition
  country_code
  language_code
  notes
  examples
  entry_status
  classification
  review_indicator
  authoritative_source
  authoritative_source_similarity
  lineage_source
  lineage_source_similarity
  date_accepted
  date_amended
  review_date
  review_status
  review_type
  review_decision
  review_decision_date
  review_decision_event
  review_decision_notes
  release
).freeze
OUTPUT_ATTRIBS =
INPUT_ATTRIBS - i(term alt abbrev synonyms classification) + i(terms)
STRIP_PUNCTUATION =
[
  ":",
  ":",
  ".",
  "–",
  "\-",
].freeze
EXAMPLE_PREFIXES =

WARNING Always put the longer Regexp match in front!

{
  # TODO: fix this, we should not have "EXAMPLES"
  eng: ["EXAMPLES", "EXAMPLE"],
  ara: "مثال",
  chi: "示例",
  dan: "EKSEMPEL",
  dut: ["VOORBEELD", "VOORBEELDEN"],
  fin: "ESIM",
  fre: "Exemple",
  # ger: "",
  jpn: "例",
  kor: "보기",
  pol: "PRZYKŁAD",
  may: "Contoh",
  rus: "Пример",
  spa: "Ejemplo",
  swe: "Exempel",
}.freeze
NOTE_PREFIXES =

WARNING Always put the longer Regexp match in front!

{
  eng: ["Note \\d to entry", "NOTE"],
  ara: "ملاحظة",
  chi: "注",
  dan: "Note",
  dut: "OPMERKING",
  # Matches "HUOM", "HUOM.", "HUOM 1." and "HUOM. 1."
  # (numeral added by the method)
  fin: "HUOM\\.?",
  fre: "A noter",
  # ger: "",
  jpn: "備考",
  kor: "비고",
  pol: "UWAGA",
  may: "catatan",
  rus: "нота",
  spa: "Nota",
  swe: ["Anm. \\d till termpost", "Anm. \\d till terpost", "Anm."],
}.freeze
ALL_FULL_HALF_WIDTH_NUMBERS =

To match Chinese and Japanese numerals

"[0-90-9]".freeze
SOURCE_STATUSES =
{
  1 => "identical",
  2 => "restyle",
  3 => "context_added",
  4 => "generalisation",
  5 => "specialisation",
  6 => "unspecified",
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Term

Returns a new instance of Term.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/tc211/termbase/term.rb', line 37

def initialize(options = {})
  @examples = []
  @notes = []
  @definition = []

  # puts "options #{options.inspect}"

  options.each_pair do |k, v|
    v = v.strip if v.is_a?(String)
    next unless v

    case k
    when /^example/
      add_example(v)
    when /^note/
      add_note(v)
    else
      # puts"Key #{k}"
      key = k.gsub("-", "_")
      send("#{key}=", v)
    end
  end
  self
end

Instance Method Details

#abbreviation_term_hashObject



322
323
324
325
326
327
328
329
# File 'lib/tc211/termbase/term.rb', line 322

def abbreviation_term_hash
  return unless abbrev

  {
    "type" => "abbreviation",
    "designation" => abbrev,
  }
end

#add_example(example) ⇒ Object



125
126
127
128
# File 'lib/tc211/termbase/term.rb', line 125

def add_example(example)
  c = clean_prefixed_string(example, EXAMPLE_PREFIXES)
  @examples << c unless c.empty?
end

#add_note(note) ⇒ Object



130
131
132
133
# File 'lib/tc211/termbase/term.rb', line 130

def add_note(note)
  c = clean_prefixed_string(note, NOTE_PREFIXES)
  @notes << c unless c.empty?
end

#alt_term_hashObject



312
313
314
315
316
317
318
319
320
# File 'lib/tc211/termbase/term.rb', line 312

def alt_term_hash
  return unless alt

  {
    "type" => "expression",
    "designation" => alt,
    "normative_status" => classification,
  }
end

#authoritative_source=(source) ⇒ Object



216
217
218
219
# File 'lib/tc211/termbase/term.rb', line 216

def authoritative_source=(source)
  clean_source!(source)
  @authoritative_source = source
end

#authoritative_source_arrayObject



361
362
363
364
365
366
367
# File 'lib/tc211/termbase/term.rb', line 361

def authoritative_source_array
  return unless authoritative_source

  [
    "link" => authoritative_source["link"],
  ]
end

#authoritative_source_hashObject



347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'lib/tc211/termbase/term.rb', line 347

def authoritative_source_hash
  return unless authoritative_source

  {
    origin: {
      link: authoritative_source["link"],
      ref: authoritative_source["ref"],
      clause: authoritative_source["clause"],
    },
    type: "authoritative",
    status: SOURCE_STATUSES[authoritative_source_similarity],
  }
end

#authoritative_source_similarity=(value) ⇒ Object

authoritative-source-similarity

Must be one of the following codes:
  identical = 1
  restyled = 2
  context added = 3
  generalisation = 4
  specialisation = 5
  unspecified = 6


229
230
231
232
233
234
# File 'lib/tc211/termbase/term.rb', line 229

def authoritative_source_similarity=(value)
  unless SOURCE_STATUSES.key?(value)
    value = 6
  end
  @authoritative_source_similarity = value
end

#carry_regex(mat) ⇒ Object



145
146
147
148
149
150
151
152
153
# File 'lib/tc211/termbase/term.rb', line 145

def carry_regex(mat)
  Regexp.new(
    [
      "^#{mat}\s*[#{STRIP_PUNCTUATION.join}]?",
      "\s*\\(?#{ALL_FULL_HALF_WIDTH_NUMBERS}*\\)?\s*",
      "[#{STRIP_PUNCTUATION.join}]?\s*",
    ].join,
  )
end

#classification=(value) ⇒ Object

classification Must be one of the following: preferred admitted deprecated



193
194
195
196
197
198
199
200
201
202
203
# File 'lib/tc211/termbase/term.rb', line 193

def classification=(value)
  case value
  when "", "认可的", "допустимый", "admitido", "adminitido"
    value = "admitted"
  when "首选的", "suositettava", "suositeltava", "рекомендуемый", "preferente"
    value = "preferred"
  when %w(preferred admitted deprecated)
    # do nothing
  end
  @classification = value
end

#clean_prefixed_string(string, criterion_map) ⇒ Object



135
136
137
138
139
140
141
142
143
# File 'lib/tc211/termbase/term.rb', line 135

def clean_prefixed_string(string, criterion_map)
  carry = string.to_s.strip
  criterion_map.values.flatten.each do |mat|
    # Arabic notes/examples sometimes use parantheses around numbers
    carry = carry.sub(carry_regex(mat), "")
  end

  carry
end

#clean_source!(source) ⇒ Object



256
257
258
259
260
261
262
263
# File 'lib/tc211/termbase/term.rb', line 256

def clean_source!(source)
  if source.is_a?(Hash)
    source["ref"]&.gsub!(/\(E\),?\s*/, "")
    source["clause"]&.gsub!(/\(E\),?\s*/, "")
  else
    source.gsub!(/\(E\),?\s*/, "")
  end
end

#definition=(definition) ⇒ Object



161
162
163
# File 'lib/tc211/termbase/term.rb', line 161

def definition=(definition)
  @definition << definition
end

#entry_status=(value) ⇒ Object

entry-status Must be one of notValid valid superseded retired



178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/tc211/termbase/term.rb', line 178

def entry_status=(value)
  case value
  when "有效的", "käytössä", "действующий", "válido"
    value = "valid"
  when "korvattu", "reemplazado"
    value = "superseded"
  when "информация отсутствует" # "information absent"!?
    value = "retired"
  when %w(notValid valid superseded retired) # do nothing
  end
  @entry_status = value
end

#id=(newid) ⇒ Object

The termid should ALWAYS be an integer. github.com/riboseinc/tc211-termbase/issues/1



157
158
159
# File 'lib/tc211/termbase/term.rb', line 157

def id=(newid)
  @id = Integer(newid)
end

#lineage_source=(source) ⇒ Object



236
237
238
239
# File 'lib/tc211/termbase/term.rb', line 236

def lineage_source=(source)
  clean_source!(source)
  @lineage_source = source
end

#lineage_source_hashObject



369
370
371
372
373
374
375
376
377
378
379
# File 'lib/tc211/termbase/term.rb', line 369

def lineage_source_hash
  return unless lineage_source

  {
    origin: {
      ref: lineage_source,
    },
    type: "lineage",
    status: SOURCE_STATUSES[lineage_source_similarity],
  }
end

#lineage_source_similarity=(value) ⇒ Object

lineage-source-similarity

Must be one of the following codes:
  identical = 1
  restyled = 2
  context added = 3
  generalisation = 4
  specialisation = 5
  unspecified = 6


249
250
251
252
253
254
# File 'lib/tc211/termbase/term.rb', line 249

def lineage_source_similarity=(value)
  unless SOURCE_STATUSES.key?(value)
    value = 6
  end
  @lineage_source_similarity = value
end

#primary_term_hashObject



302
303
304
305
306
307
308
309
310
# File 'lib/tc211/termbase/term.rb', line 302

def primary_term_hash
  return unless term

  {
    "type" => "expression",
    "designation" => term,
    "normative_status" => classification,
  }
end

#retired?Boolean

Returns:

  • (Boolean)


289
290
291
# File 'lib/tc211/termbase/term.rb', line 289

def retired?
  release >= 0
end

#review_decision=(value) ⇒ Object

value Must be one of withdrawn, accepted notAccepted



282
283
284
285
286
287
# File 'lib/tc211/termbase/term.rb', line 282

def review_decision=(value)
  unless ["", "withdrawn", "accepted", "notAccepted"].include?(value)
    value = ""
  end
  @review_decision = value
end

#review_indicator=(value) ⇒ Object

review-indicator

Must be one of the following
  <empty field>
  Under Review in Source Document


209
210
211
212
213
214
# File 'lib/tc211/termbase/term.rb', line 209

def review_indicator=(value)
  unless ["", "Under Review in Source Document"].include?(value)
    value = ""
  end
  @review_indicator = value
end

#review_status=(value) ⇒ Object

value Must be one of pending tentative final



266
267
268
269
270
271
# File 'lib/tc211/termbase/term.rb', line 266

def review_status=(value)
  unless ["", "pending", "tentative", "final"].include?(value)
    value = ""
  end
  @review_status = value
end

#review_type=(value) ⇒ Object

value Must be one of supersession, retirement



274
275
276
277
278
279
# File 'lib/tc211/termbase/term.rb', line 274

def review_type=(value)
  unless ["", "supersession", "retirement"].include?(value)
    value = ""
  end
  @review_type = value
end

#sources_hashObject



340
341
342
343
344
345
# File 'lib/tc211/termbase/term.rb', line 340

def sources_hash
  [
    authoritative_source_hash,
    lineage_source_hash,
  ].compact
end

#synonyms_term_hashObject



331
332
333
334
335
336
337
338
# File 'lib/tc211/termbase/term.rb', line 331

def synonyms_term_hash
  return unless synonyms

  {
    "type" => "expression",
    "designation" => synonyms,
  }
end

#termsObject



293
294
295
296
297
298
299
300
# File 'lib/tc211/termbase/term.rb', line 293

def terms
  [
    primary_term_hash,
    alt_term_hash,
    abbreviation_term_hash,
    synonyms_term_hash,
  ].compact
end

#to_hashObject



165
166
167
168
169
170
171
172
173
174
# File 'lib/tc211/termbase/term.rb', line 165

def to_hash
  OUTPUT_ATTRIBS.inject({}) do |acc, attrib|
    value = send(attrib)
    if value.nil?
      acc
    else
      acc.merge(attrib.to_s => value)
    end
  end
end

#to_localized_concept_hashObject



381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# File 'lib/tc211/termbase/term.rb', line 381

def to_localized_concept_hash
  concept_hash = to_hash

  %w[
    review_status
    review_decision
    review_decision_notes
    review_indicator
    authoritative_source
    authoritative_source_similarity
    lineage_source
    lineage_source_similarity
    country_code
  ].each do |key|
    concept_hash.delete(key)
  end

  concept_hash["id"] = concept_hash["id"].to_s
  concept_hash["sources"] = sources_hash

  if authoritative_source_array
    concept_hash["authoritativeSource"] = authoritative_source_array
  end

  concept_hash
end