Class: Scripsi::SortedSuffixIndexer

Inherits:
Object
  • Object
show all
Defined in:
lib/scripsi.rb

Defined Under Namespace

Classes: Documents

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(id = nil, check = true) ⇒ SortedSuffixIndexer



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/scripsi.rb', line 57

def initialize(id=nil,check=true)
  if check
    if id and Scripsi.redis.hexists "scripsi:used", id.to_s
      raise "id '#{id}' in use"
    end
    @id = id ? id.to_s : Scripsi.redis.incr("scripsi:next_id")
    Scripsi.redis.hset "scripsi:used", @id, "ssi"
  else
    @id = id
  end
  @index_key = "scripsi:index:#{@id}"
  @document_key = "scripsi:document:#{@id}"
  @documents_key = "scripsi:documents:#{@id}"
  @search_length = 30
end

Class Method Details

.build(id) ⇒ Object

creates an indexer with the given id WITHOUT CHECKING this method is used internally - calling it yourself may result in deleting an indexer, unless you know the id you’re using is valid



132
133
134
# File 'lib/scripsi.rb', line 132

def self.build(id)
  new(id,false)
end

Instance Method Details

#documentsObject

retrive the document with the given id



110
111
112
# File 'lib/scripsi.rb', line 110

def documents
  Documents.new(@document_key,@documents_key)
end

#index(id, str) ⇒ Boolean

adds a document to this indexer



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/scripsi.rb', line 78

def index(id,str)
  id = id.to_s
  return false if Scripsi.redis.hexists @documents_key, id
  offset = Scripsi.redis.strlen @document_key
  sfxs = suffixes(str).sort_by{|s,i| s}
  sfxs.each do |suffix,i|
    Scripsi.score(suffix).each_with_index do |scr,j|
      Scripsi.redis.zadd "#{@index_key}:#{j}", scr, i+offset
    end
  end
  doc = str + "\0#{id}\0"
  Scripsi.redis.append @document_key, doc
  endpoints = Marshal.dump([offset, offset + str.size - 1])
  Scripsi.redis.hset @documents_key, id, endpoints
end

#inspectObject



136
137
138
# File 'lib/scripsi.rb', line 136

def inspect
  "#<Scripsi::SortedSuffixIndexer id=#{@id}>"
end

#search(term) ⇒ Object

searches for documents containing the substring term



117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/scripsi.rb', line 117

def search(term)
  term, length = term.downcase, term.length
  set = nil
  Scripsi.score(term).each_with_index do |scr,i|
    a,b = scr.to_s, "#{scr+1.0/(27**length)}"
    b = "(" + b unless a == b
    ids = Scripsi.redis.zrangebyscore("#{@index_key}:#{i}",a,b)
    set = set ? set & Set.new(ids) : Set.new(ids)
    length -= Scripsi.partition_size
  end
  set.map{|i| read_to_id(i.to_i)}.uniq
end