Module: ActsAsFerret

Defined in:
lib/act_methods.rb,
lib/index.rb,
lib/without_ar.rb,
lib/blank_slate.rb,
lib/local_index.rb,
lib/multi_index.rb,
lib/unix_daemon.rb,
lib/bulk_indexer.rb,
lib/rdig_adapter.rb,
lib/remote_index.rb,
lib/class_methods.rb,
lib/ferret_result.rb,
lib/ferret_server.rb,
lib/acts_as_ferret.rb,
lib/more_like_this.rb,
lib/search_results.rb,
lib/instance_methods.rb,
lib/remote_functions.rb,
lib/remote_multi_index.rb,
lib/ferret_find_methods.rb

Overview

:nodoc:

Defined Under Namespace

Modules: ActMethods, ClassMethods, FerretFindMethods, InstanceMethods, MoreLikeThis, RdigAdapter, Remote, RemoteFunctions, ResultAttributes, WithoutAR Classes: AbstractIndex, ActsAsFerretError, BlankSlate, BulkIndexer, FerretResult, IndexAlreadyDefined, IndexLogger, IndexNotDefined, LocalIndex, MultiIndex, MultiIndexBase, RemoteIndex, RemoteMultiIndex, SearchResults

Constant Summary collapse

DEFAULT_FIELD_OPTIONS =

Default ferret configuration for index fields

{
  :store       => :no, 
  :highlight   => :yes, 
  :index       => :yes, 
  :term_vector => :with_positions_offsets,
  :boost       => 1.0
}
@@multi_indexes =

global Hash containing all multi indexes created by all classes using the plugin key is the concatenation of alphabetically sorted names of the classes the searcher searches.

Hash.new
@@ferret_indexes =

global Hash containing the ferret indexes of all classes using the plugin key is the index name.

Hash.new
@@index_using_classes =

mapping from class name to index name

{}
@@logger =
Logger.new "#{RAILS_ROOT}/log/acts_as_ferret.log"
@@raise_drb_errors =
false
@@remote =
nil

Class Method Summary collapse

Class Method Details

.append_features(base) ⇒ Object



525
526
527
528
# File 'lib/acts_as_ferret.rb', line 525

def self.append_features(base)
  super
  base.extend(ClassMethods)
end

.build_field_config(fields) ⇒ Object



498
499
500
501
502
503
504
505
506
507
508
# File 'lib/acts_as_ferret.rb', line 498

def self.build_field_config(fields)
  field_config = {}
  case fields
  when Array
    fields.each { |name| field_config[name] = field_config_for name }
  when Hash
    fields.each { |name, options| field_config[name] = field_config_for name, options }
  else raise InvalidArgumentError.new(":fields option must be Hash or Array")
  end if fields
  return field_config
end

.change_index_dir(name, new_dir) ⇒ Object



389
390
391
# File 'lib/acts_as_ferret.rb', line 389

def self.change_index_dir(name, new_dir)
  get_index(name).change_index_dir new_dir
end

.close_multi_indexesObject



554
555
556
557
558
559
560
561
562
563
564
565
# File 'lib/acts_as_ferret.rb', line 554

def self.close_multi_indexes
  # close combined index readers, just in case
  # this seems to fix a strange test failure that seems to relate to a
  # multi_index looking at an old version of the content_base index.
  multi_indexes.each_pair do |key, index|
    # puts "#{key} -- #{self.name}"
    # TODO only close those where necessary (watch inheritance, where
    # self.name is base class of a class where key is made from)
    index.close #if key =~ /#{self.name}/
  end
  multi_indexes.clear
end

.combine_conditions(conditions, additional_conditions = []) ⇒ Object

combine our conditions with those given by user, if any



487
488
489
490
491
492
493
494
495
496
# File 'lib/acts_as_ferret.rb', line 487

def self.combine_conditions(conditions, additional_conditions = [])
  returning conditions do
    if additional_conditions && additional_conditions.any?
      cust_opts = (Array === additional_conditions) ? additional_conditions.dup : [ additional_conditions ]
      logger.debug "cust_opts: #{cust_opts.inspect}"
      conditions.first << " and " << cust_opts.shift
      conditions.concat(cust_opts)
    end
  end
end

.conditions_for_model(model, conditions = {}) ⇒ Object

check for per-model conditions and return these if provided



423
424
425
426
427
428
429
# File 'lib/acts_as_ferret.rb', line 423

def self.conditions_for_model(model, conditions = {})
  if Hash === conditions
    key = model.name.underscore.to_sym
    conditions = conditions[key]
  end
  return conditions
end

.create_index_instance(definition) ⇒ Object

creates a new Index instance.



381
382
383
# File 'lib/acts_as_ferret.rb', line 381

def self.create_index_instance(definition)
  (remote? ? RemoteIndex : LocalIndex).new(definition)
end

.define_index(name, options = {}) ⇒ Object

Globally declares an index.

This method is also used to implicitly declare an index when you use the acts_as_ferret call in your class. Returns the created index instance.

Options are:

models

Hash of model classes and their per-class option hashes which should use this index. Any models mentioned here will automatically use the index, there is no need to explicitly call acts_as_ferret in the model class definition.



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/acts_as_ferret.rb', line 159

def self.define_index(name, options = {})
  name = name.to_sym
  pending_classes = nil
  if ferret_indexes.has_key?(name)
    # seems models have been already loaded. remove that index for now,
    # re-register any already loaded classes later on.
    idx = get_index(name)
    pending_classes = idx.index_definition[:registered_models]
    pending_classes_configs = idx.registered_models_config
    idx.close
    ferret_indexes.delete(name)
  end

  index_definition = {
    :index_dir => "#{ActsAsFerret::index_dir}/#{name}",
    :name => name,
    :single_index => false,
    :reindex_batch_size => 1000,
    :ferret => {},
    :ferret_fields => {},             # list of indexed fields that will be filled later
    :enabled => true,                 # used for class-wide disabling of Ferret
    :mysql_fast_batches => true,      # turn off to disable the faster, id based batching mechanism for MySQL
    :raise_drb_errors => false        # handle DRb connection errors by default
  }.update( options )

  index_definition[:registered_models] = []
  
  # build ferret configuration
  index_definition[:ferret] = {
    :or_default          => false, 
    :handle_parse_errors => true,
    :default_field       => nil,              # will be set later on
    #:max_clauses => 512,
    #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
    # :wild_card_downcase => true
  }.update( options[:ferret] || {} )

  index_definition[:user_default_field] = index_definition[:ferret][:default_field]

  unless remote?
    ActsAsFerret::ensure_directory index_definition[:index_dir] 
    index_definition[:index_base_dir] = index_definition[:index_dir]
    index_definition[:index_dir] = find_last_index_version(index_definition[:index_dir])
    logger.debug "using index in #{index_definition[:index_dir]}"
  end
  
  # these properties are somewhat vital to the plugin and shouldn't
  # be overwritten by the user:
  index_definition[:ferret].update(
    :key               => :key,
    :path              => index_definition[:index_dir],
    :auto_flush        => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
    :create_if_missing => true
  )

  # field config
  index_definition[:ferret_fields] = build_field_config( options[:fields] )
  index_definition[:ferret_fields].update build_field_config( options[:additional_fields] )

  idx = ferret_indexes[name] = create_index_instance( index_definition )

  # re-register early loaded classes
  if pending_classes
    pending_classes.each { |clazz| idx.register_class clazz, { :force_re_registration => true }.merge(pending_classes_configs[clazz]) }
  end

  if models = options[:models]
    models.each do |clazz, config|
      clazz.send :include, ActsAsFerret::WithoutAR unless clazz.respond_to?(:acts_as_ferret)
      clazz.acts_as_ferret config.merge(:index => name)
    end
  end

  return idx
end

.ensure_directory(dir) ⇒ Object



510
511
512
# File 'lib/acts_as_ferret.rb', line 510

def self.ensure_directory(dir)
  FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir))
end

.ferret_indexesObject



104
# File 'lib/acts_as_ferret.rb', line 104

def self.ferret_indexes; @@ferret_indexes end

.field_infos(index_definition) ⇒ Object

builds a FieldInfos instance for creation of an index



531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
# File 'lib/acts_as_ferret.rb', line 531

def self.field_infos(index_definition)
  # default attributes for fields
  fi = Ferret::Index::FieldInfos.new(:store => :no, 
                                      :index => :yes, 
                                      :term_vector => :no,
                                      :boost => 1.0)
  # unique key composed of classname and id
  fi.add_field(:key, :store => :no, :index => :untokenized)
  # primary key
  fi.add_field(:id, :store => :yes, :index => :untokenized) 
  # class_name
  fi.add_field(:class_name, :store => :yes, :index => :untokenized)

  # other fields
  index_definition[:ferret_fields].each_pair do |field, options|
    options = options.dup
    options.delete :via
    options.delete :boost if options[:boost].is_a?(Symbol) # dynamic boost
    fi.add_field(field, options)
  end
  return fi
end

.filter_include_list_for_model(model, include_options) ⇒ Object



348
349
350
351
352
353
354
355
# File 'lib/acts_as_ferret.rb', line 348

def self.filter_include_list_for_model(model, include_options)
  filtered_include_options = []
  include_options = Array(include_options)
  include_options.each do |include_option|
    filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
  end
  return filtered_include_options
end

.find(query, models_or_index_name, options = {}, ar_options = {}) ⇒ Object

models_or_index_name may be an index name as declared in config/aaf.rb, a single class or an array of classes to limit search to these classes.



301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
# File 'lib/acts_as_ferret.rb', line 301

def self.find(query, models_or_index_name, options = {}, ar_options = {})
  models = case models_or_index_name
  when Array
    models_or_index_name
  when Class
    [ models_or_index_name ]
  else
    nil
  end
  index = find_index(models_or_index_name)
  multi = (MultiIndexBase === index or index.shared?)
  unless options[:per_page]
    options[:limit] ||= ar_options.delete :limit
    options[:offset] ||= ar_options.delete :offset
  end
  if options[:limit] || options[:per_page]
    # need pagination
    options[:page] = if options[:per_page]
      options[:page] ? options[:page].to_i : 1
    else
      nil
    end
    limit = options[:limit] || options[:per_page]
    offset = options[:offset] || (options[:page] ? (options[:page] - 1) * limit : 0)
    options.delete :offset
    options[:limit] = :all
    
    if multi or ((ar_options[:conditions] || ar_options[:order]) && options[:sort])
      # do pagination as the last step after everything has been fetched
      options[:late_pagination] = { :limit => limit, :offset => offset }
    elsif ar_options[:conditions] or ar_options[:order]
      # late limiting in AR call
      unless limit == :all
        ar_options[:limit] = limit
        ar_options[:offset] = offset
      end
    else
      options[:limit] = limit
      options[:offset] = offset
    end
  end
  ActsAsFerret::logger.debug "options: #{options.inspect}\nar_options: #{ar_options.inspect}"
  total_hits, result = index.find_records query, options.merge(:models => models), ar_options
  ActsAsFerret::logger.debug "Query: #{query}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
  SearchResults.new(result, total_hits, options[:page], options[:per_page])
end

.find_ids(query, models_or_index_name, options = {}, &block) ⇒ Object

find ids of records



280
281
282
283
# File 'lib/acts_as_ferret.rb', line 280

def self.find_ids(query, models_or_index_name, options = {}, &block)
  options = add_models_to_options_if_necessary options, models_or_index_name
  find_index(models_or_index_name).find_ids query, options, &block
end

.find_index(models_or_index_name) ⇒ Object

returns an index instance suitable for searching/updating the named index. Will return a read only MultiIndex when multiple model classes are given that do not share the same physical index.



288
289
290
291
292
293
294
295
296
297
# File 'lib/acts_as_ferret.rb', line 288

def self.find_index(models_or_index_name)
  case models_or_index_name
  when Symbol
    get_index models_or_index_name
  when String
    get_index models_or_index_name.to_sym
  else
    get_index_for models_or_index_name
  end
end

.find_last_index_version(basedir) ⇒ Object

find the most recent version of an index



394
395
396
397
398
399
400
401
402
403
404
405
406
407
# File 'lib/acts_as_ferret.rb', line 394

def self.find_last_index_version(basedir)
  # check for versioned index
  versions = Dir.entries(basedir).select do |f| 
    dir = File.join(basedir, f)
    File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
  end
  if versions.any?
    # select latest version
    versions.sort!
    File.join basedir, versions.last
  else
    basedir
  end
end

.get_index(name) ⇒ Object

returns the index with the given name.



261
262
263
264
265
266
267
268
269
270
271
# File 'lib/acts_as_ferret.rb', line 261

def self.get_index(name)
  name = name.to_sym rescue nil
  unless ferret_indexes.has_key?(name)
    if @aaf_config_loaded
      raise IndexNotDefined.new(name.to_s)
    else
      load_config and return get_index name
    end
  end
  ferret_indexes[name]
end

.get_index_for(*classes) ⇒ Object

returns the index used by the given class.

If multiple classes are given, either the single index shared by these classes, or a multi index (to be used for search only) across the indexes of all models, is returned.

Raises:

  • (ArgumentError)


362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
# File 'lib/acts_as_ferret.rb', line 362

def self.get_index_for(*classes)
  classes.flatten!
  raise ArgumentError.new("no class specified") unless classes.any?
  classes.map!(&:constantize) unless Class === classes.first
  logger.debug "index_for #{classes.inspect}"
  index = if classes.size > 1
    indexes = classes.map { |c| get_index_for c }.uniq
    indexes.size > 1 ? multi_index(indexes) : indexes.first
  else
    clazz = classes.first
    clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name)
    get_index @@index_using_classes[clazz.name]
  end
  raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil?
  return index
end

.index_using_classesObject



108
# File 'lib/acts_as_ferret.rb', line 108

def self.index_using_classes; @@index_using_classes end

.init_index_basedirObject

make sure the default index base dir exists. by default, all indexes are created under RAILS_ROOT/index/RAILS_ENV



517
518
519
520
# File 'lib/acts_as_ferret.rb', line 517

def self.init_index_basedir
  index_base = "#{RAILS_ROOT}/index"
  @@index_dir = "#{index_base}/#{RAILS_ENV}"
end

.load_configObject



251
252
253
254
255
256
257
258
# File 'lib/acts_as_ferret.rb', line 251

def self.load_config
  # using require_dependency to make the reloading in dev mode work.
  require_dependency "#{RAILS_ROOT}/config/aaf.rb"
  ActsAsFerret::logger.info "loaded configuration file aaf.rb"
rescue LoadError
ensure
  @aaf_config_loaded = true
end

.multi_index(indexes) ⇒ Object

returns a MultiIndex instance operating on a MultiReader



410
411
412
413
414
415
416
417
418
419
420
# File 'lib/acts_as_ferret.rb', line 410

def self.multi_index(indexes)
  index_names = indexes.dup
  index_names = index_names.map(&:to_s) if Symbol === index_names.first
  if String === index_names.first
    indexes = index_names.map{ |name| get_index name }
  else
    index_names = index_names.map{ |i| i.index_name.to_s }
  end
  key = index_names.sort.join(",")
  ActsAsFerret::multi_indexes[key] ||= (remote? ? ActsAsFerret::RemoteMultiIndex : ActsAsFerret::MultiIndex).new(indexes)
end

.multi_indexesObject



99
# File 'lib/acts_as_ferret.rb', line 99

def self.multi_indexes; @@multi_indexes end

.raise_drb_errors?Boolean

Returns:

  • (Boolean)


126
# File 'lib/acts_as_ferret.rb', line 126

def self.raise_drb_errors?; @@raise_drb_errors end

.rebuild_index(name) ⇒ Object



385
386
387
# File 'lib/acts_as_ferret.rb', line 385

def self.rebuild_index(name)
  get_index(name).rebuild_index
end

.register_class_with_index(clazz, index_name, options = {}) ⇒ Object

called internally by the acts_as_ferret method

returns the index



238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/acts_as_ferret.rb', line 238

def self.register_class_with_index(clazz, index_name, options = {})
  index_name = index_name.to_sym
  @@index_using_classes[clazz.name] = index_name
  unless index = ferret_indexes[index_name]
    # index definition on the fly
    # default to all attributes of this class
    options[:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym }
    index = define_index index_name, options
  end
  index.register_class(clazz, options)
  return index
end

.remote?Boolean

Returns:

  • (Boolean)


130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/acts_as_ferret.rb', line 130

def self.remote?
  if @@remote.nil?
    if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
      @@remote = false
    else
      @@remote = ActsAsFerret::Remote::Config.new.uri rescue false
    end
    if @@remote
      logger.info "Will use remote index server which should be available at #{@@remote}"
    else
      logger.info "Will use local index."
    end
  end
  @@remote
end

.retrieve_records(id_arrays, find_options = {}) ⇒ Object

retrieves search result records from a data structure like this: { ‘Model1’ => { ‘1’ => [ rank, score ], ‘2’ => [ rank, score ] }

TODO: in case of STI AR will filter out hits from other classes for us, but this will lead to less results retrieved –> scoping of ferret query to self.class is still needed. from the ferret ML (thanks Curtis Hatter) > I created a method in my base STI class so I can scope my query. For scoping > I used something like the following line: > > query << “ role:#‘*’ : self.class” > > Though you could make it more generic by simply asking > “self.descends_from_active_record?” which is how rails decides if it should > scope your “find” query for STI models. You can check out “base.rb” in > activerecord to see that. but maybe better do the scoping in find_ids_with_ferret…



449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
# File 'lib/acts_as_ferret.rb', line 449

def self.retrieve_records(id_arrays, find_options = {})
  result = []
  # get objects for each model
  id_arrays.each do |model, id_array|
    next if id_array.empty?
    model_class = model.constantize

    # merge conditions
    conditions = conditions_for_model model_class, find_options[:conditions]
    conditions = combine_conditions([ "#{model_class.table_name}.#{model_class.primary_key} in (?)", 
                                      id_array.keys ], 
                                    conditions)

    # check for include association that might only exist on some models in case of multi_search
    filtered_include_options = nil
    if include_options = find_options[:include]
      filtered_include_options = filter_include_list_for_model(model_class, include_options)
    end

    # fetch
    tmp_result = model_class.find(:all, find_options.merge(:conditions => conditions, 
                                                           :include    => filtered_include_options))

    # set scores and rank
    tmp_result.each do |record|
      record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
    end
    # merge with result array
    result += tmp_result
  end
  
  # order results as they were found by ferret, unless an AR :order
  # option was given
  result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
  return result
end

.total_hits(query, models_or_index_name, options = {}) ⇒ Object

count hits for a query



274
275
276
277
# File 'lib/acts_as_ferret.rb', line 274

def self.total_hits(query, models_or_index_name, options = {})
  options = add_models_to_options_if_necessary options, models_or_index_name
  find_index(models_or_index_name).total_hits query, options
end