Class: SuffixTree

Inherits:
Object
  • Object
show all
Defined in:
lib/suffix_tree.rb

Overview

Builds a suffix tree from one or more DataSource instances

Constant Summary collapse

NO_SUFFIX_OFFSET =
-1

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(terminalValue = nil, configuration = nil, persister = nil) ⇒ SuffixTree

Returns a new instance of SuffixTree.



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/suffix_tree.rb', line 24

def initialize(terminalValue = nil, configuration = nil, persister = nil)
  @nextNodeId = 0
  @nodeFactory = NodeFactory.new(nil, persister)
  @nodeFactory.setConfiguration(configuration) if (configuration != nil)
  @root = @nodeFactory.newRoot()
  @rootDataSource = nil
  @location = Location.new(@root)
  @startOffset = 0
  @suffixOffset = 0
  @suffixLinker = SuffixLinker.new
  @terminalValue = terminalValue
end

Instance Attribute Details

#locationObject (readonly)

where we are in the implicit tree building process



14
15
16
# File 'lib/suffix_tree.rb', line 14

def location
  @location
end

#nodeFactoryObject (readonly)

Returns the value of attribute nodeFactory.



16
17
18
# File 'lib/suffix_tree.rb', line 16

def nodeFactory
  @nodeFactory
end

#rootObject (readonly)

the root of the tree, and the terminal value (for making implicit trees explicit)



19
20
21
# File 'lib/suffix_tree.rb', line 19

def root
  @root
end

#rootDataSourceObject (readonly)

first data source we use



8
9
10
# File 'lib/suffix_tree.rb', line 8

def rootDataSource
  @rootDataSource
end

#startOffsetObject (readonly)

when there are a sequence of data sources, treat them as one long one, this is where next source starts



11
12
13
# File 'lib/suffix_tree.rb', line 11

def startOffset
  @startOffset
end

#suffixLinkerObject (readonly)

keep track of which nodes need suffix links



22
23
24
# File 'lib/suffix_tree.rb', line 22

def suffixLinker
  @suffixLinker
end

#terminalValueObject (readonly)

the root of the tree, and the terminal value (for making implicit trees explicit)



19
20
21
# File 'lib/suffix_tree.rb', line 19

def terminalValue
  @terminalValue
end

Instance Method Details

#addDataSource(dataSource) ⇒ Object

Add all values in a given dataSource



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/suffix_tree.rb', line 50

def addDataSource(dataSource)
  @suffixOffset = 0
  self.setDataSource(dataSource)
  dataSource.each_with_index(@startOffset) do |value, offset|
    self.addValue(value, offset)
  end
  if (@terminalValue != nil) then
    @lastOffsetAdded += 1
    self.addValue(@terminalValue, @lastOffsetAdded)
  end
  @startOffset = @lastOffsetAdded + 1
end

#addValue(value, offset) ⇒ Object

Adding one value at a time, rootDataSource must be set for this to work



66
67
68
69
70
71
# File 'lib/suffix_tree.rb', line 66

def addValue(value, offset)
  while (extend(value, offset)) do
    @suffixLinker.update(@location)
  end
  @lastOffsetAdded = offset
end

#extend(value, offset) ⇒ Object

Extend a single suffix at the current location, returns true if there is another

suffix to extend.

Handles these cases:

   On a node:
      if there is a child starting with the extension value, traverse down that one value, return FALSE
      if no child has the extension value, add a leaf,
          if we are at root, return FALSE,
          otherwise traverse to the next suffix and return TRUE

   On an edge:
      if next character has the value, traverse past it, return FALSE
      if next character is not the value, split edge at that location, locate at the new node, and return TRUE


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/suffix_tree.rb', line 98

def extend(value,offset)
  if (@location.onNode)
    if (@location.node.children.has_key?(value)) then
      @location.traverseDownChildValue(value)
      return false  # rule 3
    else
      @nodeFactory.addLeaf(@location.node, value, offset)
      return @location.traverseToNextSuffix(@rootDataSource)  # rule 1, traverse returns false when at root
    end
  elsif (@rootDataSource.valueAt(@location.incomingEdgeOffset) == value) then
    @location.traverseDownEdgeValue()
    return false   # found value on edge, rule 3
  else
    newNode = @nodeFactory.splitEdgeAt(@location.node, @location.incomingEdgeOffset)
    @suffixLinker.nodeNeedingSuffixLink(newNode)
    @location.jumpToNode(newNode)
    return true   # rule 2
  end
end

#finishObject

Finish building the tree by adding a value that is not part of the data source



76
77
78
79
80
# File 'lib/suffix_tree.rb', line 76

def finish()
  if (@rootDataSource.has_terminator?) then
    self.addValue(@rootDataSource.terminator, @startOffset)
  end
end

#setDataSource(dataSource) ⇒ Object

Set the data source, but do not add any values from the data source



40
41
42
43
44
45
# File 'lib/suffix_tree.rb', line 40

def setDataSource(dataSource)
  if (@rootDataSource == nil) then
    @rootDataSource = dataSource
  end
  @nodeFactory.extendDataSource(dataSource, @startOffset)
end