Class: TrieTree

Inherits:
Object
  • Object
show all
Defined in:
lib/trie_suggest.rb

Overview

Trie tree class

Defined Under Namespace

Classes: TrieNode

Constant Summary collapse

DEFAULT =
0
SPACE =
1
KANJI =
2
SYMBOL =
3
NUMERIC =
4
ALPHA =
5
HIRAGANA =
6
KATAKANA =
7
KANJINUMERIC =

漢字数字

8
GREEK =

ギリシャ文字

9
CYRILLIC =

キリル文字

10

Instance Method Summary collapse

Constructor Details

#initializeTrieTree

Returns a new instance of TrieTree.



20
21
22
23
24
25
# File 'lib/trie_suggest.rb', line 20

def initialize
  @root = TrieNode.new('')
  @romaji_dictionary = {}
  # you need install natto https://github.com/buruzaemon/natto/
  @nm = Natto::MeCab.new
end

Instance Method Details

#add_word(keyword, score) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/trie_suggest.rb', line 37

def add_word(keyword, score)
  return if keyword.blank?
  return if score < 10
  keyword = normalize(keyword)
  return if keyword.to_s.empty?
  romaji = to_romaji(keyword)
  if @romaji_dictionary[romaji].nil? || @romaji_dictionary[romaji][1] < score
    @romaji_dictionary[romaji] = [keyword, score]
  end

  romaji += '+'
  current_node = @root
  for i in 0..romaji.size - 1
    char = romaji.slice(0, i + 1)
    idx = romaji[i].ord
    current_node.next[idx] = TrieNode.new(char) if current_node.next[idx].nil?
    current_node.next[idx].score += score
    current_node = current_node.next[idx]
  end
end

#levenshtein(word1, word2) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/trie_suggest.rb', line 119

def levenshtein(word1, word2)
  columns = word1.size + 1
  rows = word2.size + 1
  current_row = [0]
  for column in 1..columns
    current_row << current_row[column - 1] + 1
  end

  for row in 1..rows
    previous_row = current_row
    current_row = [ previous_row[0] + 1 ]

    for column in 1..columns
      insert_cost = current_row[column - 1] + 1
      delete_cost = previous_row[column] + 1
      if word1[column - 1] != word2[row - 1]
        replace_cost = previous_row[ column - 1 ] + 1
      else
        replace_cost = previous_row[ column - 1 ]
      end
      current_row << [insert_cost, delete_cost, replace_cost].min
    end
  end
  return current_row[-1]
end

#match(keyword) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/trie_suggest.rb', line 90

def match(keyword)
  return '' if keyword.blank? || keyword == '+'
  keyword = normalize(keyword)
  romaji = to_romaji(keyword)

  current_node = @root
  for i in 0..romaji.size - 1
    idx = romaji[i].ord
    return '' if current_node.next[idx].nil?
    current_node = current_node.next[idx]
  end
  return '' if current_node.next['+'.ord].nil?
  @romaji_dictionary[current_node.val][0]
end

#spellcheck(keyword, max_cost = 3) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/trie_suggest.rb', line 105

def spellcheck(keyword, max_cost = 3)
  return [] if keyword.blank? || keyword == '+'
  keyword += '+'
  current_node = @root

  current_row = []
  for i in 0..keyword.size
    current_row[i] = i
  end
  res = []
  # recursively search each branch of the trie
  current_node.next.map { |node| search_recursive(node[1], node[1].val[-1], keyword, current_row, res, max_cost) }
end

#suggest(keyword) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/trie_suggest.rb', line 58

def suggest(keyword)
  return [] if keyword.blank? || keyword == '+'
  res = []
  keyword = normalize(keyword)
  romaji = to_romaji(keyword)

  current_node = @root
  # search keyword olog(keyword.length)
  for i in 0..romaji.size - 1
    idx = romaji[i].ord
    return res if current_node.next[idx].nil?
    current_node = current_node.next[idx]
  end
  # Traversal node
  next_level = current_node.next.sort_by { |r| r[1].score }.reverse
  until next_level.empty?
    node = next_level.shift
    if node[1].val.last == '+'
      next if @romaji_dictionary[node[1].val.chop].nil?
      sufrace = @romaji_dictionary[node[1].val.chop][0]
      res << [sufrace, node[1].score]
      return res if res.size > 9
    else
      next if node[1].next.empty?
      next_level << node[1].next.sort_by { |r| r[1].score }.reverse[0]
      next_level = next_level.sort_by { |r| r[1].score }.reverse
      next_level = next_level[0..9]
    end
  end
  res
end