Class: DistributedTrie::Trie

Inherits:
Object
  • Object
show all
Defined in:
lib/distributedtrie/trie.rb

Instance Method Summary collapse

Constructor Details

#initialize(kvsif, prefixString) ⇒ Trie

kvsif … Please implement like DistributedTrie::KvsIF class and specify instance of it.



41
42
43
44
45
46
# File 'lib/distributedtrie/trie.rb', line 41

def initialize( kvsif, prefixString )
  @kvsif        = kvsif
  @req          = Hash.new
  @prefixString = prefixString
  @key_hash     = Hash.new
end

Instance Method Details

#_createTree(key) ⇒ Object



193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/distributedtrie/trie.rb', line 193

def _createTree( key )
  h = Hash.new
  str = ''
  key.split( // ).each { |c|
    val = if str.size == (key.size-1)
            c + '$'
          else
            c
          end
    h [ str ] = val
    str += c
  }

  h.keys.each{ |key|
    if not @key_hash.has_key?( key )
      @key_hash[ key ]  = ''
    end
    @key_hash[ key ] += ' ' + h[ key ]
    @key_hash[key] = _mergeIndex( @key_hash[key] )
  }
  @key_hash
end

#_getInternal(type = :work) ⇒ Object



216
217
218
# File 'lib/distributedtrie/trie.rb', line 216

def _getInternal( type = :work )
  @key_hash
end

#_getNextLetters(node) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/distributedtrie/trie.rb', line 152

def _getNextLetters( node )
  str = @kvsif.get( @prefixString + node )
  if str
    term    = []
    nonTerm = []
    str.split( /[ ]+/ ).each { |x|
      case x.size
      when 1
        nonTerm << x
      when 2
        term    << x[0...1]
      end
    }
    [ term, nonTerm ]
  else
    [ [], [] ]
  end
end

#_mergeIndex(indexStr) ⇒ Object



171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/distributedtrie/trie.rb', line 171

def _mergeIndex( indexStr )
  # "a$ a" => "a$"    # merge into terminal
  # " a$"  => "a$"    # strip spaces
  # "a$ b" => "a$ b"  # alredy merged

  h = Hash.new
  term    = Array.new
  nonTerm = Array.new
  indexStr.split( /[ ]+/ ).each {|entry|
    case entry.size
    when 1
      nonTerm << entry
    when 2
      term    << entry[0...1]
    else
    end
  }
  arr  = term.uniq.map{ |x| x + '$' }
  arr += nonTerm.uniq.reject { |x| term.include?( x ) }
  arr.join( ' ' )
end

#_searchWith(key, &block) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/distributedtrie/trie.rb', line 95

def _searchWith( key, &block )
  result = []
  (term, nonTerm) = _getNextLetters( key )
  term.each { |x|
    arg = key + x
    #pp [ "_check(1)", arg ]
    if block.call( arg, true )
      #pp [ '_match(1)', key, x ]
      result += _searchWith( key + x, &block )
      result << arg
    elsif block.call( arg, false )
      #pp [ '_match(2)', key, x ]
      result += _searchWith( key + x, &block )
    end
  }
  nonTerm.each { |x|
    arg = key + x
    #pp [ "_check(3)", arg ]
    if block.call( arg, false )
      #pp [ '_match(3)', key, x ]
      result += _searchWith( key + x, &block )
    end
  }
  result
end

#addKey!(key) ⇒ Object



48
49
50
# File 'lib/distributedtrie/trie.rb', line 48

def addKey!( key )
  _createTree( key )
end

#cancelObject



63
64
65
# File 'lib/distributedtrie/trie.rb', line 63

def cancel()
  @key_hash    = Hash.new
end

#commit!Object



55
56
57
58
59
60
61
# File 'lib/distributedtrie/trie.rb', line 55

def commit!()
  @key_hash.each_key { |key|
    cur = @kvsif.get( @prefixString + key, "" )
    @kvsif.put!( @prefixString + key, _mergeIndex( cur + " " + @key_hash[ key ] ))
  }
  @key_hash    = Hash.new
end

#commonPrefixSearch(key) ⇒ Object



80
81
82
83
# File 'lib/distributedtrie/trie.rb', line 80

def commonPrefixSearch( key )
  result =  exactMatchSearch( key )
  result += listChilds( key )
end

#deleteKey!(key) ⇒ Object



52
53
# File 'lib/distributedtrie/trie.rb', line 52

def deleteKey!( key )
end

#exactMatchSearch(key) ⇒ Object



85
86
87
88
89
90
91
92
93
# File 'lib/distributedtrie/trie.rb', line 85

def exactMatchSearch( key )
  (term, nonTerm) = _getNextLetters( key[0...(key.size-1)] )
  #pp [ "exactMatchSearch", key, key[0...(key.size-1)], term, nonTerm ]
  if term.include?( key[-1] )
    [key]
  else
    []
  end
end

#fuzzySearch(searchWord, threshold = 0.90) ⇒ Object

return: [ [distance, keyword], [distance, keyword], … ]



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/distributedtrie/trie.rb', line 134

def fuzzySearch( searchWord, threshold = 0.90 )
  jarow = FuzzyStringMatch::JaroWinkler.create( )
  result = search( '' ) { |x,termFlag|
    _word = searchWord
    if not termFlag and (x.size < searchWord.size)
      _word = searchWord[0...x.size]
      (searchWord.size-x.size).times { |i|
        _word += ' '
        x     += ' ' 
        # pp [ "non terminal: ", i, x, _word ]
      }
    end
    result = jarow.getDistance( x, _word )
    threshold <= result
  }
  result.map { |k| [ jarow.getDistance( searchWord, k ), k ] }.sort_by {|item| 1.0 - item[0]}
end

#listChilds(key) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/distributedtrie/trie.rb', line 67

def listChilds( key )
  result = []
  (term, nonTerm) = _getNextLetters( key )
  #pp [ "searchChilds", key, term, nonTerm ]
  term.each { |x|
    result << key + x
  }
  (term + nonTerm).each { |x|
    result += listChilds( key + x )
  }
  result
end

#rangeSearch(from, to) ⇒ Object



125
126
127
128
129
130
131
# File 'lib/distributedtrie/trie.rb', line 125

def rangeSearch( from, to )
  search( '' ) { |x,termFlag|
    _from = from[0...x.size]
    _to   = to  [0...x.size]
    ( _from <= x ) && ( x <= _to  )
  }
end

#search(entryNode, &block) ⇒ Object



121
122
123
# File 'lib/distributedtrie/trie.rb', line 121

def search( entryNode, &block )
  _searchWith( entryNode, &block )
end