Class: SuffixTree::Base
- Inherits:
-
Object
- Object
- SuffixTree::Base
- Defined in:
- lib/suffix_tree/base.rb
Overview
Constant Summary collapse
- UNIQ_IDENTIFIERS =
((0...97).map(&:chr) + (135...200).map(&:chr)).freeze
Instance Attribute Summary collapse
-
#active ⇒ Object
Returns the value of attribute active.
-
#end ⇒ Object
Returns the value of attribute end.
-
#input ⇒ Object
Returns the value of attribute input.
-
#l_map ⇒ Object
Returns the value of attribute l_map.
-
#remaining ⇒ Object
Returns the value of attribute remaining.
-
#root ⇒ Object
Returns the value of attribute root.
-
#uniq_word_map ⇒ Object
Returns the value of attribute uniq_word_map.
-
#words ⇒ Object
Returns the value of attribute words.
Instance Method Summary collapse
- #base_case ⇒ Object
- #build ⇒ Object
- #diff(node) ⇒ Object
-
#initialize(words) ⇒ Base
constructor
A new instance of Base.
- #longest_common_substring(k = words.length) ⇒ Object
- #next_char(i) ⇒ Object
- #select_node(index) ⇒ Object
- #selected_node ⇒ Object
- #set_index_dfs(node, val, size) ⇒ Object
- #set_lvalues_dfs(node, str = '') ⇒ Object
- #start_phase(index) ⇒ Object
- #walk_down(index) ⇒ Object
Constructor Details
#initialize(words) ⇒ Base
Returns a new instance of Base.
10 11 12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/suffix_tree/base.rb', line 10 def initialize(words) @words = words @input = '' @l_map = {} @uniq_word_map = {} words.each_with_index do |word, index| id = UNIQ_IDENTIFIERS[index] @uniq_word_map[id] = index @input += "#{word}#{id}" end @input = @input.split('') @remaining = 0 end |
Instance Attribute Details
#active ⇒ Object
Returns the value of attribute active.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def active @active end |
#end ⇒ Object
Returns the value of attribute end.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def end @end end |
#input ⇒ Object
Returns the value of attribute input.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def input @input end |
#l_map ⇒ Object
Returns the value of attribute l_map.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def l_map @l_map end |
#remaining ⇒ Object
Returns the value of attribute remaining.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def remaining @remaining end |
#root ⇒ Object
Returns the value of attribute root.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def root @root end |
#uniq_word_map ⇒ Object
Returns the value of attribute uniq_word_map.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def uniq_word_map @uniq_word_map end |
#words ⇒ Object
Returns the value of attribute words.
6 7 8 |
# File 'lib/suffix_tree/base.rb', line 6 def words @words end |
Instance Method Details
#base_case ⇒ Object
208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
# File 'lib/suffix_tree/base.rb', line 208 def base_case max_length = -1 answer = '' words.each do |word| if max_length < word.length max_length = word.length answer = word elsif max_length == word.length answer = [answer, word].min end end answer end |
#build ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/suffix_tree/base.rb', line 24 def build self.root = Node.new(1, End.new(0), words.count) root.index = -1 self.active = ActivePoint.new(root) self.end = End.new(-1) input.each_with_index do |_elm, index| start_phase(index) end set_index_dfs(root, 0, input.length) set_lvalues_dfs(root) end |
#diff(node) ⇒ Object
174 175 176 |
# File 'lib/suffix_tree/base.rb', line 174 def diff(node) node.end.end - node.start end |
#longest_common_substring(k = words.length) ⇒ Object
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# File 'lib/suffix_tree/base.rb', line 186 def longest_common_substring(k = words.length) raise 'Input has to be integer' unless k.is_a? Integer raise 'Invalid Input' if k <= 0 return base_case if k == 1 max_length = -1 answer = '' l_map.each do |key, v| next if key < k if v.length > max_length max_length = v.length answer = v elsif v.length == max_length answer = [v, answer].min end end answer end |
#next_char(i) ⇒ Object
159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/suffix_tree/base.rb', line 159 def next_char(i) node = selected_node return input[active.node.child[input[active.edge]].start + active.length] if diff(node) >= active.length if diff(node) + 1 == active.length return input[i] if node.child[input[i]] else active.node = node active.length = active.length - diff(node) - 1 active.edge = active.edge + diff(node) + 1 return next_char(i) end raise 'End Of Path Reached' end |
#select_node(index) ⇒ Object
182 183 184 |
# File 'lib/suffix_tree/base.rb', line 182 def select_node(index) active.node.child[input[index]] end |
#selected_node ⇒ Object
178 179 180 |
# File 'lib/suffix_tree/base.rb', line 178 def selected_node active.node.child[input[active.edge]] end |
#set_index_dfs(node, val, size) ⇒ Object
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/suffix_tree/base.rb', line 58 def set_index_dfs(node, val, size) return unless node val += node.end.end - node.start + 1 if node.index != -1 node.index = size - val c_index = node.index c_index += 1 until uniq_word_map[input[c_index]] node.c_values[uniq_word_map[input[c_index]]] = 1 return end nums = [node.c_values] node.child.each do |_key, child| set_index_dfs(child, val, size) nums << child.c_values end if node != root node.c_values = nums.transpose.map(&:sum).map { |e| e.positive? ? 1 : 0 } end node.c_value = node.c_values.sum end |
#set_lvalues_dfs(node, str = '') ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/suffix_tree/base.rb', line 37 def set_lvalues_dfs(node, str = '') return unless node return if node.index != -1 str += input[node.start..node.end.end].join('') if l_map[node.c_value] if l_map[node.c_value].length < str.length l_map[node.c_value] = str elsif l_map[node.c_value].length == str.length l_map[node.c_value] = [str, l_map[node.c_value]].min end else l_map[node.c_value] = str end node.child.each do |_key, child| set_lvalues_dfs(child, str) end end |
#start_phase(index) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/suffix_tree/base.rb', line 81 def start_phase(index) last_created_internal_node = nil self.end.end += 1 self.remaining += 1 while remaining.positive? if active.length.zero? if select_node(index) active.edge = select_node(index).start active.length += 1 break else root.child[input[index]] = Node.new(index, self.end, words.count) self.remaining -= 1 end else begin char = next_char(index) if char == input[index] last_created_internal_node.suffix_link = selected_node if last_created_internal_node walk_down(index) break else node = selected_node temp_start = node.start node.start = node.start + active.length new_internal_node = Node.new(temp_start, End.new(temp_start + active.length - 1), words.count) new_leaf_node = Node.new(index, self.end, words.count) new_internal_node.child[input[new_internal_node.start + active.length]] = node new_internal_node.child[input[index]] = new_leaf_node new_internal_node.index = -1 active.node.child[input[new_internal_node.start]] = new_internal_node last_created_internal_node.suffix_link = new_internal_node if last_created_internal_node last_created_internal_node = new_internal_node new_internal_node.suffix_link = root if active.node != root active.node = active.node.suffix_link else active.edge = active.edge + 1 active.length -= 1 end self.remaining -= 1 end rescue StandardError node = selected_node node.child[input[index]] = Node.new(index, self.end, words.count) last_created_internal_node.suffix_link = node if last_created_internal_node last_created_internal_node = node if active.node != root active.node = active.node.suffix_link else active.edge = active.edge + 1 active.length -= 1 end self.remaining -= 1 end end end end |
#walk_down(index) ⇒ Object
147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/suffix_tree/base.rb', line 147 def walk_down(index) node = selected_node if diff(node) < active.length active.node = node active.length = active.length - diff(node) active.edge = node.child[input[index]].start else active.length += 1 end end |