Class: CheapSkate::Index

Inherits:
Ferret::Index::Index
  • Object
show all
Defined in:
lib/cheap_skate/index.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}, schema = CheapSkate::Schema.new) ⇒ Index

Returns a new instance of Index.



4
5
6
7
# File 'lib/cheap_skate/index.rb', line 4

def initialize(opts={}, schema=CheapSkate::Schema.new)
  super(opts)
  @schema = schema
end

Instance Attribute Details

#schemaObject

Returns the value of attribute schema.



3
4
5
# File 'lib/cheap_skate/index.rb', line 3

def schema
  @schema
end

Instance Method Details

#create_document(id = UUID.generate, boost = 1.0) ⇒ Object



220
221
222
223
224
# File 'lib/cheap_skate/index.rb', line 220

def create_document(id=UUID.generate, boost=1.0)
  d = CheapSkate::Document.new(id, boost)
  d.index = self
  d
end

#do_search(query, opts = {}) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/cheap_skate/index.rb', line 57

def do_search(query, opts={})
  results = ResultSet.new
  results.offset = opts[:offset]
  results.limit = opts[:limit]
  results.query = query.query.to_s
  if opts[:limit] < 1
    opts[:limit] = 1
  end
  if query.filter
    opts[:filter] = query.filter
  end
  
  if query.filter_proc
    if query.query.is_a?(Ferret::Search::MatchAllQuery) && query.filter.nil?
      get_facets_from_index_terms(query)
    else
      opts[:filter_proc] = query.filter_proc
    end
  end
  searcher = Ferret::Search::Searcher.new(self.reader)
  hits = searcher.search(query.query, opts)
  results.total = hits.total_hits
  results.max_score = hits.max_score
  hits.hits.each do |hit|
   doc = @schema.typed_document(self[hit.doc])
   doc[:score] = hit.score
   results << doc      
  end
  if query.respond_to?(:facet_fields)
    facets = {}
    query.facet_fields.each do | facet, values |
      facets[facet] = values.sort{|a,b| b[1]<=>a[1]}[query.facet_offset, query.facet_limit]
    end          
    query.facet_fields = facets
    if query.facet_queries
      query.facet_queries.each do |fq|        
        fq[:results] = search_each(fq.query, :filter=>fq.filter, :limit=>1) {|id,score|}
      end
    end
    results.extend(Facet)
    results.add_facets_to_results(query)
  end
  results
end

#get_facets_from_index_terms(query) ⇒ Object



102
103
104
105
106
107
108
109
110
# File 'lib/cheap_skate/index.rb', line 102

def get_facets_from_index_terms(query)
  query.facet_fields.keys.each do |field|
    field_terms = reader.terms(field)
    next unless field_terms
    field_terms.each do |term, count|
      query.facet_fields[field][term] = count
    end
  end
end

#luke(params) ⇒ Object



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/cheap_skate/index.rb', line 226

def luke(params)
  response = LukeResponse.new
  response.num_docs = reader.num_docs
  response.max_doc = reader.max_doc
  response.version = reader.version
  response.optimized = true # Hard code this -- not sure there's any way to get this from Ferret
  response.current = reader.version == writer.version
  response.has_deletions = reader.has_deletions?
  response.directory = options[:path]
  response.last_modified = Time.now.xmlschema # I don't see this in Ferret, either
  reader.field_infos.each do | field |
    schema_field = schema.fields[field.name]
    if schema_field
      luke_field = {:type=>schema_field[:type]}
      multivalued = schema.multi_valued?(field.name)
    else
      luke_field = {:type=>"text"}
      multivalued = true
    end
    schema_string = ""
    schema_string << case field.indexed?
    when true then "I"
    else "-"
    end
    schema_string << case field.tokenized?
    when true then "T"
    else "-"
    end
    
    schema_string << case field.stored?
    when true then "S"
    else "-"
    end
    
    schema_string << case multivalued
    when true then "M"
    else "-"
    end
    
    schema_string << case field.store_term_vector?
    when true then "V"
    else "-"
    end
    
    schema_string << case field.store_offsets?
    when true then "o"
    else "-"
    end        
    
    schema_string << case field.store_positions?
    when true then "p"
    else "-"
    end        
    
    schema_string << case field.omit_norms?
    when true then "O"
    else "-"
    end  
    schema_string << "--"
    
    schema_string << case field.compressed?
    when true then "C"
    else "-"
    end   
    
    schema_string << "--"     
    response.fields[field.name] = {:schema=>schema_string}
    #terms = {}
    #reader.terms(field.name).each do |term, count|
    #  terms[term] = count
    #end
    
  end
  response
end

#parse_dismax_query(params) ⇒ Object



173
174
175
# File 'lib/cheap_skate/index.rb', line 173

def parse_dismax_query(params)
  parse_standard_query(params)
end

#parse_filtered_query(params) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/cheap_skate/index.rb', line 142

def parse_filtered_query(params)
  if params["q.op"] && !params["q.op"].empty?
    or_and = [*params["q.op"]].first
  else 
    or_and = schema.default_operator
  end

  dflt_field = case
  when params["df"] then [*params["df"]].first
  else schema.default_field
  end

  strict_parser = Ferret::QueryParser.new(:default_field=>dflt_field, :fields=>reader.tokenized_fields, :validate_fields=>true, :or_default=>(or_and=="OR"), :handle_parse_errors=>false)
  bool = Ferret::Search::BooleanQuery.new
  [*params['fq']].each do |fq|
    next if fq.nil? or fq.empty?
    if (filtq = strict_parser.parse(fq) && !filtq.to_s.empty?)
      bool.add_query(filtq, :must)
    else
      (idx, term) = fq.split(":")
      term = term.sub(/^\"/,'').sub(/\"$/,'')
      bool.add_query(Ferret::Search::TermQuery.new(idx.to_sym, term), :must)
    end
  end
  unless bool.to_s.empty?
    return Ferret::Search::QueryFilter.new(bool)
  end
  nil
end

#parse_morelikethis_query(params) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/cheap_skate/index.rb', line 177

def parse_morelikethis_query(params)
  q = case params["q"].class.name
  when "Array" then params["q"].first
  when "String" then params["q"]
  else "*:*"
  end
  opts = {}
  opts[:limit] = 1
  if params['mlt.match.offset']
    opts[:offset] = [*params['mlt.match.offset']].first.to_i
  end
  mlt = nil
  self.search_each(q, opts) do |doc, score|
    mlt = self[doc].load
  end
  bool = Ferret::Search::BooleanQuery.new
    unless params['mlt.match.include'] && [*params['mlt.match.include']].first == "true"
      b = Ferret::Search::BooleanQuery.new
      bool.add_query(Ferret::Search::TermQuery.new(:id, mlt[:id]), :must_not)
    end      
  mlt.each_pair do |key, val|

    if val.is_a?(Array)
      val.each do | v |
        b = Ferret::Search::BooleanQuery.new
        b.add_query(Ferret::Search::TermQuery.new(key, v))
        bool << b
      end
    else
      b = Ferret::Search::BooleanQuery.new
      b.add_query(Ferret::Search::TermQuery.new(key, val))
      bool << b
    end
  end
  query = CheapSkate::Query.new
  
  # No idea why this is necessary, but Ferret will ignore our boolean NOT otherwise
  p = Ferret::QueryParser.new      
  query.query = p.parse(bool.to_s)

  return query
end

#parse_standard_query(params) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/cheap_skate/index.rb', line 113

def parse_standard_query(params)
  if params["q.op"] && !params["q.op"].empty?
    or_and = [*params["q.op"]].first
  else 
    or_and = schema.default_operator
  end
  dflt_field = case
  when params["df"] then [*params["df"]].first
  else schema.default_field
  end
  parser = Ferret::QueryParser.new(:default_field=>dflt_field, :fields=>reader.tokenized_fields, :or_default=>(or_and=="OR"))

  query = CheapSkate::Query.new
  q = case params["q"].class.name
  when "Array" then params["q"].first
  when "String" then params["q"]
  else nil
  end
  if q && !q.empty? && q != "*:*"
    query.query = parser.parse(q)
  else
    query.query = Ferret::Search::MatchAllQuery.new
  end
  if params['fq'] && !params['fq'].empty?
    query.filter = parse_filtered_query(params)
  end
  query
end

#set_dynamic_field(field) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/cheap_skate/index.rb', line 23

def set_dynamic_field(field)
  return if field_infos[field]

  return if schema.fields[field]

  dyn_field = nil
  schema.dynamic_fields.keys.each do |dyn|
    if dyn =~ /^\*/
      r = Regexp.new(dyn.sub(/^\*/,".*"))
    elsif dyn =~ /\*$/
      r = Regexp.new(dyn.sub(/\*$/,".*"))
    end
    unless (field.to_s =~ r).nil?
      dyn_field = dyn
      break
    else
      puts "Unable to match #{field.to_s} against a dynamic field pattern"
    end
  end
  return unless dyn_field
  opts = {}
  if schema.dynamic_fields[dyn_field][:index] == :no
    opts[:index] = :no
    opts[:term_vector] = :no
  elsif schema.field_types[schema.dynamic_fields[dyn_field][:field_type]][:index]
    opts[:index] = schema.field_types[schema.dynamic_fields[dyn_field][:field_type]][:index]
  end
  if schema.dynamic_fields[dyn_field][:stored] == :no
    opts[:store] = :no
  end    
  puts "Adding dynamic field: #{field}"
  writer.field_infos.add_field(field, opts)
end

#set_fields_from_schemaObject



9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/cheap_skate/index.rb', line 9

def set_fields_from_schema
  index_schema_changed = false
  schema.field_names.each do |fld|
    f = schema.field_to_field_info(fld)
    if !field_infos[f.name]
      self.writer.field_infos << f
      self.reader.field_infos << f
      index_schema_changed = true
    end
  end
  puts "Schema has changed" if index_schema_changed
    
end