Class: Datasets::SudachiSynonymDictionary

Inherits:
Dataset
  • Object
show all
Defined in:
lib/datasets/sudachi-synonym-dictionary.rb

Defined Under Namespace

Classes: Synonym

Instance Attribute Summary

Attributes inherited from Dataset

#metadata

Instance Method Summary collapse

Methods inherited from Dataset

#clear_cache!, #to_table

Constructor Details

#initializeSudachiSynonymDictionary

Returns a new instance of SudachiSynonymDictionary.


19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/datasets/sudachi-synonym-dictionary.rb', line 19

def initialize
  super()
  @metadata.id = "sudachi-synonym-dictionary"
  @metadata.name = "Sudachi synonym dictionary"
  @metadata.url = "https://github.com/WorksApplications/SudachiDict/blob/develop/docs/synonyms.md"
  @metadata.licenses = [
    "Apache-2.0",
  ]
  @metadata.description = lambda do
    download_description
  end
end

Instance Method Details

#eachObject


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/datasets/sudachi-synonym-dictionary.rb', line 32

def each
  return to_enum(__method__) unless block_given?

  lexeme_id_context = {}
  open_data do |csv|
    csv.each do |row|
      group_id = row[0]
      if group_id != lexeme_id_context[:group_id]
        lexeme_id_context[:group_id] = group_id
        lexeme_id_context[:counter] = 0
      end
      is_noun = (row[1] == "1")
      expansion_type = normalize_expansion_type(row[2])
      lexeme_id = normalize_lexeme_id(row[3], lexeme_id_context)
      form_type = normalize_form_type(row[4])
      acronym_type = normalize_acronym_type(row[5])
      variant_type = normalize_variant_type(row[6])
      categories = normalize_categories(row[7])
      notation = row[8]
      synonym = Synonym.new(group_id,
                            is_noun,
                            expansion_type,
                            lexeme_id,
                            form_type,
                            acronym_type,
                            variant_type,
                            categories,
                            notation)
      yield(synonym)
    end
  end
end