Module: ActsAsFerret

Defined in:
lib/act_methods.rb,
lib/index.rb,
lib/without_ar.rb,
lib/blank_slate.rb,
lib/local_index.rb,
lib/multi_index.rb,
lib/unix_daemon.rb,
lib/bulk_indexer.rb,
lib/rdig_adapter.rb,
lib/remote_index.rb,
lib/class_methods.rb,
lib/ferret_result.rb,
lib/ferret_server.rb,
lib/acts_as_ferret.rb,
lib/more_like_this.rb,
lib/search_results.rb,
lib/instance_methods.rb,
lib/remote_functions.rb,
lib/remote_multi_index.rb,
lib/ferret_find_methods.rb

Overview

:nodoc:

Defined Under Namespace

Modules: ActMethods, ClassMethods, FerretFindMethods, InstanceMethods, MoreLikeThis, RdigAdapter, Remote, RemoteFunctions, ResultAttributes, WithoutAR Classes: AbstractIndex, ActsAsFerretError, BlankSlate, BulkIndexer, FerretResult, IndexAlreadyDefined, IndexLogger, IndexNotDefined, LocalIndex, MultiIndex, MultiIndexBase, RemoteIndex, RemoteMultiIndex, SearchResults

Constant Summary collapse

DEFAULT_FIELD_OPTIONS =

Default ferret configuration for index fields

{
  :store       => :no, 
  :highlight   => :yes, 
  :index       => :yes, 
  :term_vector => :with_positions_offsets,
  :boost       => 1.0
}
@@multi_indexes =

global Hash containing all multi indexes created by all classes using the plugin key is the concatenation of alphabetically sorted names of the classes the searcher searches.

Hash.new
@@ferret_indexes =

global Hash containing the ferret indexes of all classes using the plugin key is the index name.

Hash.new
@@index_using_classes =

mapping from class name to index name

{}
@@logger =
Logger.new "#{RAILS_ROOT}/log/acts_as_ferret.log"
@@raise_drb_errors =
false
@@remote =
nil

Class Method Summary collapse

Class Method Details

.append_features(base) ⇒ Object



533
534
535
536
# File 'lib/acts_as_ferret.rb', line 533

def self.append_features(base)
  super
  base.extend(ClassMethods)
end

.build_field_config(fields) ⇒ Object



506
507
508
509
510
511
512
513
514
515
516
# File 'lib/acts_as_ferret.rb', line 506

def self.build_field_config(fields)
  field_config = {}
  case fields
  when Array
    fields.each { |name| field_config[name] = field_config_for name }
  when Hash
    fields.each { |name, options| field_config[name] = field_config_for name, options }
  else raise InvalidArgumentError.new(":fields option must be Hash or Array")
  end if fields
  return field_config
end

.change_index_dir(name, new_dir) ⇒ Object



394
395
396
# File 'lib/acts_as_ferret.rb', line 394

def self.change_index_dir(name, new_dir)
  get_index(name).change_index_dir new_dir
end

.close_multi_indexesObject



562
563
564
565
566
567
568
569
570
571
572
573
# File 'lib/acts_as_ferret.rb', line 562

def self.close_multi_indexes
  # close combined index readers, just in case
  # this seems to fix a strange test failure that seems to relate to a
  # multi_index looking at an old version of the content_base index.
  multi_indexes.each_pair do |key, index|
    # puts "#{key} -- #{self.name}"
    # TODO only close those where necessary (watch inheritance, where
    # self.name is base class of a class where key is made from)
    index.close #if key =~ /#{self.name}/
  end
  multi_indexes.clear
end

.combine_conditions(conditions, additional_conditions = []) ⇒ Object

combine our conditions with those given by user, if any



496
497
498
499
500
501
502
503
504
# File 'lib/acts_as_ferret.rb', line 496

def self.combine_conditions(conditions, additional_conditions = [])
  if additional_conditions && additional_conditions.any?
    cust_opts = (Array === additional_conditions) ? additional_conditions.dup : [ additional_conditions ]
    logger.debug "cust_opts: #{cust_opts.inspect}"
    conditions.first << " and " << cust_opts.shift
    conditions.concat(cust_opts)
  end
  return conditions
end

.conditions_for_model(model, conditions = {}) ⇒ Object

check for per-model conditions and return these if provided



428
429
430
431
432
433
434
# File 'lib/acts_as_ferret.rb', line 428

def self.conditions_for_model(model, conditions = {})
  if Hash === conditions
    key = model.name.underscore.to_sym
    conditions = conditions[key]
  end
  return conditions
end

.create_index_instance(definition) ⇒ Object

creates a new Index instance.



386
387
388
# File 'lib/acts_as_ferret.rb', line 386

def self.create_index_instance(definition)
  (remote? ? RemoteIndex : LocalIndex).new(definition)
end

.define_index(name, options = {}) ⇒ Object

Declares an index.

Use this method to define your indexes in a global initializer (i.e. config/initializers/aaf.rb). This is especially useful if you want to have multiple classes share the same index for cross-model searching as you only need a single call to declare the index for all models.

This method is also used internally to declare an index when you use the acts_as_ferret call inside your class (which in turn can be omitted if the initializer is used). Returns the created index instance.

Options are:

models

Hash of model classes and their per-class option hashes which should use this index. Any models mentioned here will automatically use the index, there is no need to explicitly call acts_as_ferret in the model class definition.



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/acts_as_ferret.rb', line 164

def self.define_index(name, options = {})
  name = name.to_sym
  pending_classes = nil
  if ferret_indexes.has_key?(name)
    # seems models have been already loaded. remove that index for now,
    # re-register any already loaded classes later on.
    idx = get_index(name)
    pending_classes = idx.index_definition[:registered_models]
    pending_classes_configs = idx.registered_models_config
    idx.close
    ferret_indexes.delete(name)
  end

  index_definition = {
    :index_dir => "#{ActsAsFerret::index_dir}/#{name}",
    :name => name,
    :single_index => false,
    :reindex_batch_size => 1000,
    :ferret => {},
    :ferret_fields => {},             # list of indexed fields that will be filled later
    :enabled => true,                 # used for class-wide disabling of Ferret
    :mysql_fast_batches => true,      # turn off to disable the faster, id based batching mechanism for MySQL
    :raise_drb_errors => false        # handle DRb connection errors by default
  }.update( options )

  index_definition[:registered_models] = []
  
  # build ferret configuration
  index_definition[:ferret] = {
    :or_default          => false, 
    :handle_parse_errors => true,
    :default_field       => nil,              # will be set later on
    #:max_clauses => 512,
    #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
    # :wild_card_downcase => true
  }.update( options[:ferret] || {} )

  index_definition[:user_default_field] = index_definition[:ferret][:default_field]

  unless remote?
    ActsAsFerret::ensure_directory index_definition[:index_dir] 
    index_definition[:index_base_dir] = index_definition[:index_dir]
    index_definition[:index_dir] = find_last_index_version(index_definition[:index_dir])
    logger.debug "using index in #{index_definition[:index_dir]}"
  end
  
  # these properties are somewhat vital to the plugin and shouldn't
  # be overwritten by the user:
  index_definition[:ferret].update(
    :key               => :key,
    :path              => index_definition[:index_dir],
    :auto_flush        => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
    :create_if_missing => true
  )

  # field config
  index_definition[:ferret_fields] = build_field_config( options[:fields] )
  index_definition[:ferret_fields].update build_field_config( options[:additional_fields] )

  idx = ferret_indexes[name] = create_index_instance( index_definition )

  # re-register early loaded classes
  if pending_classes
    pending_classes.each { |clazz| idx.register_class clazz, { :force_re_registration => true }.merge(pending_classes_configs[clazz]) }
  end

  if models = options[:models]
    models.each do |clazz, config|
      clazz.send :include, ActsAsFerret::WithoutAR unless clazz.respond_to?(:acts_as_ferret)
      clazz.acts_as_ferret config.merge(:index => name)
    end
  end

  return idx
end

.ensure_directory(dir) ⇒ Object



518
519
520
# File 'lib/acts_as_ferret.rb', line 518

def self.ensure_directory(dir)
  FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir))
end

.ferret_indexesObject



104
# File 'lib/acts_as_ferret.rb', line 104

def self.ferret_indexes; @@ferret_indexes end

.field_infos(index_definition) ⇒ Object

builds a FieldInfos instance for creation of an index



539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
# File 'lib/acts_as_ferret.rb', line 539

def self.field_infos(index_definition)
  # default attributes for fields
  fi = Ferret::Index::FieldInfos.new(:store => :no, 
                                      :index => :yes, 
                                      :term_vector => :no,
                                      :boost => 1.0)
  # unique key composed of classname and id
  fi.add_field(:key, :store => :no, :index => :untokenized)
  # primary key
  fi.add_field(:id, :store => :yes, :index => :untokenized) 
  # class_name
  fi.add_field(:class_name, :store => :yes, :index => :untokenized)

  # other fields
  index_definition[:ferret_fields].each_pair do |field, options|
    options = options.dup
    options.delete :via
    options.delete :boost if options[:boost].is_a?(Symbol) # dynamic boost
    fi.add_field(field, options)
  end
  return fi
end

.filter_include_list_for_model(model, include_options) ⇒ Object



353
354
355
356
357
358
359
360
# File 'lib/acts_as_ferret.rb', line 353

def self.filter_include_list_for_model(model, include_options)
  filtered_include_options = []
  include_options = Array(include_options)
  include_options.each do |include_option|
    filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
  end
  return filtered_include_options
end

.find(query, models_or_index_name, options = {}, ar_options = {}) ⇒ Object

models_or_index_name may be an index name as declared in config/aaf.rb, a single class or an array of classes to limit search to these classes.



306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/acts_as_ferret.rb', line 306

def self.find(query, models_or_index_name, options = {}, ar_options = {})
  models = case models_or_index_name
  when Array
    models_or_index_name
  when Class
    [ models_or_index_name ]
  else
    nil
  end
  index = find_index(models_or_index_name)
  multi = (MultiIndexBase === index or index.shared?)
  unless options[:per_page]
    options[:limit] ||= ar_options.delete :limit
    options[:offset] ||= ar_options.delete :offset
  end
  if options[:limit] || options[:per_page]
    # need pagination
    options[:page] = if options[:per_page]
      options[:page] ? options[:page].to_i : 1
    else
      nil
    end
    limit = options[:limit] || options[:per_page]
    offset = options[:offset] || (options[:page] ? (options[:page] - 1) * limit : 0)
    options.delete :offset
    options[:limit] = :all
    
    if multi or ((ar_options[:conditions] || ar_options[:order]) && options[:sort])
      # do pagination as the last step after everything has been fetched
      options[:late_pagination] = { :limit => limit, :offset => offset }
    elsif ar_options[:conditions] or ar_options[:order]
      # late limiting in AR call
      unless limit == :all
        ar_options[:limit] = limit
        ar_options[:offset] = offset
      end
    else
      options[:limit] = limit
      options[:offset] = offset
    end
  end
  ActsAsFerret::logger.debug "options: #{options.inspect}\nar_options: #{ar_options.inspect}"
  total_hits, result = index.find_records query, options.merge(:models => models), ar_options
  ActsAsFerret::logger.debug "Query: #{query}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
  SearchResults.new(result, total_hits, options[:page], options[:per_page])
end

.find_ids(query, models_or_index_name, options = {}, &block) ⇒ Object

find ids of records



285
286
287
288
# File 'lib/acts_as_ferret.rb', line 285

def self.find_ids(query, models_or_index_name, options = {}, &block)
  options = add_models_to_options_if_necessary options, models_or_index_name
  find_index(models_or_index_name).find_ids query, options, &block
end

.find_index(models_or_index_name) ⇒ Object

returns an index instance suitable for searching/updating the named index. Will return a read only MultiIndex when multiple model classes are given that do not share the same physical index.



293
294
295
296
297
298
299
300
301
302
# File 'lib/acts_as_ferret.rb', line 293

def self.find_index(models_or_index_name)
  case models_or_index_name
  when Symbol
    get_index models_or_index_name
  when String
    get_index models_or_index_name.to_sym
  else
    get_index_for models_or_index_name
  end
end

.find_last_index_version(basedir) ⇒ Object

find the most recent version of an index



399
400
401
402
403
404
405
406
407
408
409
410
411
412
# File 'lib/acts_as_ferret.rb', line 399

def self.find_last_index_version(basedir)
  # check for versioned index
  versions = Dir.entries(basedir).select do |f| 
    dir = File.join(basedir, f)
    File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
  end
  if versions.any?
    # select latest version
    versions.sort!
    File.join basedir, versions.last
  else
    basedir
  end
end

.get_index(name) ⇒ Object

returns the index with the given name.



266
267
268
269
270
271
272
273
274
275
276
# File 'lib/acts_as_ferret.rb', line 266

def self.get_index(name)
  name = name.to_sym rescue nil
  unless ferret_indexes.has_key?(name)
    if @aaf_config_loaded
      raise IndexNotDefined.new(name.to_s)
    else
      load_config and return get_index name
    end
  end
  ferret_indexes[name]
end

.get_index_for(*classes) ⇒ Object

returns the index used by the given class.

If multiple classes are given, either the single index shared by these classes, or a multi index (to be used for search only) across the indexes of all models, is returned.

Raises:

  • (ArgumentError)


367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
# File 'lib/acts_as_ferret.rb', line 367

def self.get_index_for(*classes)
  classes.flatten!
  raise ArgumentError.new("no class specified") unless classes.any?
  classes.map!(&:constantize) unless Class === classes.first
  logger.debug "index_for #{classes.inspect}"
  index = if classes.size > 1
    indexes = classes.map { |c| get_index_for c }.uniq
    indexes.size > 1 ? multi_index(indexes) : indexes.first
  else
    clazz = classes.first
    clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name)
    get_index @@index_using_classes[clazz.name]
  end
  raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil?
  return index
end

.index_using_classesObject



108
# File 'lib/acts_as_ferret.rb', line 108

def self.index_using_classes; @@index_using_classes end

.init_index_basedirObject

make sure the default index base dir exists. by default, all indexes are created under RAILS_ROOT/index/RAILS_ENV



525
526
527
528
# File 'lib/acts_as_ferret.rb', line 525

def self.init_index_basedir
  index_base = "#{RAILS_ROOT}/index"
  @@index_dir = "#{index_base}/#{RAILS_ENV}"
end

.load_configObject



256
257
258
259
260
261
262
263
# File 'lib/acts_as_ferret.rb', line 256

def self.load_config
  # using require_dependency to make the reloading in dev mode work.
  require_dependency "#{RAILS_ROOT}/config/aaf.rb"
  ActsAsFerret::logger.info "loaded configuration file aaf.rb"
rescue LoadError
ensure
  @aaf_config_loaded = true
end

.multi_index(indexes) ⇒ Object

returns a MultiIndex instance operating on a MultiReader



415
416
417
418
419
420
421
422
423
424
425
# File 'lib/acts_as_ferret.rb', line 415

def self.multi_index(indexes)
  index_names = indexes.dup
  index_names = index_names.map(&:to_s) if Symbol === index_names.first
  if String === index_names.first
    indexes = index_names.map{ |name| get_index name }
  else
    index_names = index_names.map{ |i| i.index_name.to_s }
  end
  key = index_names.sort.join(",")
  ActsAsFerret::multi_indexes[key] ||= (remote? ? ActsAsFerret::RemoteMultiIndex : ActsAsFerret::MultiIndex).new(indexes)
end

.multi_indexesObject



99
# File 'lib/acts_as_ferret.rb', line 99

def self.multi_indexes; @@multi_indexes end

.raise_drb_errors?Boolean

Returns:

  • (Boolean)


126
# File 'lib/acts_as_ferret.rb', line 126

def self.raise_drb_errors?; @@raise_drb_errors end

.rebuild_index(name) ⇒ Object



390
391
392
# File 'lib/acts_as_ferret.rb', line 390

def self.rebuild_index(name)
  get_index(name).rebuild_index
end

.register_class_with_index(clazz, index_name, options = {}) ⇒ Object

called internally by the acts_as_ferret method

returns the index



243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/acts_as_ferret.rb', line 243

def self.register_class_with_index(clazz, index_name, options = {})
  index_name = index_name.to_sym
  @@index_using_classes[clazz.name] = index_name
  unless index = ferret_indexes[index_name]
    # index definition on the fly
    # default to all attributes of this class
    options[:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym }
    index = define_index index_name, options
  end
  index.register_class(clazz, options)
  return index
end

.remote?Boolean

Returns:

  • (Boolean)


130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/acts_as_ferret.rb', line 130

def self.remote?
  if @@remote.nil?
    if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
      @@remote = false
    else
      @@remote = ActsAsFerret::Remote::Config.new.uri rescue false
    end
    if @@remote
      logger.info "Will use remote index server which should be available at #{@@remote}"
    else
      logger.info "Will use local index."
    end
  end
  @@remote
end

.retrieve_records(id_arrays, find_options = {}) ⇒ Object

retrieves search result records from a data structure like this: { ‘Model1’ => { ‘1’ => [ rank, score ], ‘2’ => [ rank, score ] }

TODO: in case of STI AR will filter out hits from other classes for us, but this will lead to less results retrieved –> scoping of ferret query to self.class is still needed. from the ferret ML (thanks Curtis Hatter) > I created a method in my base STI class so I can scope my query. For scoping > I used something like the following line: > > query << “ role:#‘*’ : self.class” > > Though you could make it more generic by simply asking > “self.descends_from_active_record?” which is how rails decides if it should > scope your “find” query for STI models. You can check out “base.rb” in > activerecord to see that. but maybe better do the scoping in find_ids_with_ferret…



454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
# File 'lib/acts_as_ferret.rb', line 454

def self.retrieve_records(id_arrays, find_options = {})
  result = []
  # get objects for each model
  id_arrays.each do |model, id_array|
    next if id_array.empty?
    # logger.debug "id array from index: #{id_array.inspect}"
    
    model_class = model.constantize

    # merge conditions
    conditions = conditions_for_model model_class, find_options[:conditions]
    conditions = combine_conditions([ "#{model_class.table_name}.#{model_class.primary_key} in (?)", 
                                      id_array.keys ], 
                                    conditions)

    # check for include association that might only exist on some models in case of multi_search
    filtered_include_options = nil
    if include_options = find_options[:include]
      filtered_include_options = filter_include_list_for_model(model_class, include_options)
    end

    # fetch
    tmp_result = model_class.find(:all, find_options.merge(:conditions => conditions, 
                                                           :include    => filtered_include_options))

    # set scores and rank
    tmp_result.each do |record|
      record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
    end
    # merge with result array
    result += tmp_result
  end
  
  # order results as they were found by ferret, unless an AR :order
  # option was given
  # logger.debug "unsorted result: #{result.map{|a| "#{a.id} / #{a.title} / #{a.ferret_rank}"}.inspect}"
  result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
  # logger.debug "sorted result: #{result.map{|a| "#{a.id} / #{a.ferret_rank}"}.inspect}"
  return result
end

.total_hits(query, models_or_index_name, options = {}) ⇒ Object

count hits for a query



279
280
281
282
# File 'lib/acts_as_ferret.rb', line 279

def self.total_hits(query, models_or_index_name, options = {})
  options = add_models_to_options_if_necessary options, models_or_index_name
  find_index(models_or_index_name).total_hits query, options
end