Class: SuffixTree::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/suffix_tree/base.rb

Overview

Constant Summary collapse

UNIQ_IDENTIFIERS =
((0...97).map(&:chr) + (135...200).map(&:chr)).freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(words) ⇒ Base

Returns a new instance of Base.



10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/suffix_tree/base.rb', line 10

def initialize(words)
  @words = words
  @input = ''
  @l_map = {}
  @uniq_word_map = {}
  words.each_with_index do |word, index|
    id = UNIQ_IDENTIFIERS[index]
    @uniq_word_map[id] = index
    @input += "#{word}#{id}"
  end
  @input = @input.split('')
  @remaining = 0
end

Instance Attribute Details

#activeObject

Returns the value of attribute active.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def active
  @active
end

#endObject

Returns the value of attribute end.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def end
  @end
end

#inputObject

Returns the value of attribute input.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def input
  @input
end

#l_mapObject

Returns the value of attribute l_map.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def l_map
  @l_map
end

#remainingObject

Returns the value of attribute remaining.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def remaining
  @remaining
end

#rootObject

Returns the value of attribute root.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def root
  @root
end

#uniq_word_mapObject

Returns the value of attribute uniq_word_map.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def uniq_word_map
  @uniq_word_map
end

#wordsObject

Returns the value of attribute words.



6
7
8
# File 'lib/suffix_tree/base.rb', line 6

def words
  @words
end

Instance Method Details

#base_caseObject



208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/suffix_tree/base.rb', line 208

def base_case
  max_length = -1
  answer = ''

  words.each do |word|
    if max_length < word.length
      max_length = word.length
      answer = word
    elsif max_length == word.length
      answer = [answer, word].min
    end
  end
  answer
end

#buildObject



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/suffix_tree/base.rb', line 24

def build
  self.root = Node.new(1, End.new(0), words.count)
  root.index = -1
  self.active = ActivePoint.new(root)
  self.end = End.new(-1)
  input.each_with_index do |_elm, index|
    start_phase(index)
  end

  set_index_dfs(root, 0, input.length)
  set_lvalues_dfs(root)
end

#diff(node) ⇒ Object



174
175
176
# File 'lib/suffix_tree/base.rb', line 174

def diff(node)
  node.end.end - node.start
end

#longest_common_substring(k = words.length) ⇒ Object



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/suffix_tree/base.rb', line 186

def longest_common_substring(k = words.length)
  raise 'Input has to be integer' unless k.is_a? Integer
  raise 'Invalid Input' if k <= 0

  return base_case if k == 1

  max_length = -1
  answer = ''

  l_map.each do |key, v|
    next if key < k

    if v.length > max_length
      max_length = v.length
      answer = v
    elsif v.length == max_length
      answer = [v, answer].min
    end
  end
  answer
end

#next_char(i) ⇒ Object



159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/suffix_tree/base.rb', line 159

def next_char(i)
  node = selected_node
  return input[active.node.child[input[active.edge]].start + active.length] if diff(node) >= active.length

  if diff(node) + 1 == active.length
    return input[i] if node.child[input[i]]
  else
    active.node = node
    active.length = active.length - diff(node) - 1
    active.edge = active.edge + diff(node) + 1
    return next_char(i)
  end
  raise 'End Of Path Reached'
end

#select_node(index) ⇒ Object



182
183
184
# File 'lib/suffix_tree/base.rb', line 182

def select_node(index)
  active.node.child[input[index]]
end

#selected_nodeObject



178
179
180
# File 'lib/suffix_tree/base.rb', line 178

def selected_node
  active.node.child[input[active.edge]]
end

#set_index_dfs(node, val, size) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/suffix_tree/base.rb', line 58

def set_index_dfs(node, val, size)
  return unless node

  val += node.end.end - node.start + 1
  if node.index != -1
    node.index = size - val
    c_index = node.index

    c_index += 1 until uniq_word_map[input[c_index]]
    node.c_values[uniq_word_map[input[c_index]]] = 1
    return
  end
  nums = [node.c_values]
  node.child.each do |_key, child|
    set_index_dfs(child, val, size)
    nums << child.c_values
  end
  if node != root
    node.c_values = nums.transpose.map(&:sum).map { |e| e.positive? ? 1 : 0 }
  end
  node.c_value = node.c_values.sum
end

#set_lvalues_dfs(node, str = '') ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/suffix_tree/base.rb', line 37

def set_lvalues_dfs(node, str = '')
  return unless node

  return if node.index != -1

  str += input[node.start..node.end.end].join('')
  if l_map[node.c_value]
    if l_map[node.c_value].length < str.length
      l_map[node.c_value] = str
    elsif l_map[node.c_value].length == str.length
      l_map[node.c_value] = [str, l_map[node.c_value]].min
    end
  else
    l_map[node.c_value] = str
  end

  node.child.each do |_key, child|
    set_lvalues_dfs(child, str)
  end
end

#start_phase(index) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/suffix_tree/base.rb', line 81

def start_phase(index)
  last_created_internal_node = nil
  self.end.end += 1
  self.remaining += 1

  while remaining.positive?
    if active.length.zero?
      if select_node(index)
        active.edge = select_node(index).start
        active.length += 1
        break
      else
        root.child[input[index]] = Node.new(index, self.end, words.count)
        self.remaining -= 1
      end
    else
      begin
        char = next_char(index)
        if char == input[index]
          last_created_internal_node.suffix_link = selected_node if last_created_internal_node
          walk_down(index)
          break
        else
          node = selected_node
          temp_start = node.start
          node.start = node.start + active.length
          new_internal_node = Node.new(temp_start, End.new(temp_start + active.length - 1), words.count)

          new_leaf_node = Node.new(index, self.end, words.count)

          new_internal_node.child[input[new_internal_node.start + active.length]] = node
          new_internal_node.child[input[index]] = new_leaf_node
          new_internal_node.index = -1
          active.node.child[input[new_internal_node.start]] = new_internal_node

          last_created_internal_node.suffix_link = new_internal_node if last_created_internal_node

          last_created_internal_node = new_internal_node
          new_internal_node.suffix_link = root

          if active.node != root
            active.node = active.node.suffix_link
          else
            active.edge = active.edge + 1
            active.length -= 1
          end
          self.remaining -= 1
        end
      rescue StandardError
        node = selected_node
        node.child[input[index]] = Node.new(index, self.end, words.count)
        last_created_internal_node.suffix_link = node if last_created_internal_node
        last_created_internal_node = node

        if active.node != root
          active.node = active.node.suffix_link
        else
          active.edge = active.edge + 1
          active.length -= 1
        end
        self.remaining -= 1
      end
    end
  end
end

#walk_down(index) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
# File 'lib/suffix_tree/base.rb', line 147

def walk_down(index)
  node = selected_node

  if diff(node) < active.length
    active.node = node
    active.length = active.length - diff(node)
    active.edge = node.child[input[index]].start
  else
    active.length += 1
  end
end