Class: ActsAsIndexed::SearchAtom
- Inherits:
-
Object
- Object
- ActsAsIndexed::SearchAtom
- Defined in:
- lib/acts_as_indexed/search_atom.rb
Instance Attribute Summary collapse
-
#records ⇒ Object
readonly
Contains a hash of records.
Instance Method Summary collapse
-
#+(other) ⇒ Object
Creates a new SearchAtom with the combined records from self and other.
-
#-(other) ⇒ Object
Creates a new SearchAtom with records in other removed from self.
-
#add_position(record_id, pos) ⇒ Object
Adds
pos
to the array of positions forrecord_id
. -
#add_record(record_id) ⇒ Object
Adds
record_id
to the stored records. -
#include_record?(record_id) ⇒ Boolean
Returns true if the given record is present.
-
#initialize(records = ActiveSupport::OrderedHash.new) ⇒ SearchAtom
constructor
A new instance of SearchAtom.
-
#positions(record_id) ⇒ Object
Returns an array of positions for
record_id
stored in this Atom. -
#preceded_by(former) ⇒ Object
Returns at atom containing the records and positions of
self
preceded byformer
“former latter” or “big dog” where “big” is the former and “dog” is the latter. -
#record_ids ⇒ Object
Returns all record IDs stored in this Atom.
-
#remove_record(record_id) ⇒ Object
Removes
record_id
from this Atom. -
#weightings(records_size) ⇒ Object
Returns a hash of record_ids and weightings for each record in the atom.
Constructor Details
#initialize(records = ActiveSupport::OrderedHash.new) ⇒ SearchAtom
Returns a new instance of SearchAtom.
14 15 16 |
# File 'lib/acts_as_indexed/search_atom.rb', line 14 def initialize(records=ActiveSupport::OrderedHash.new) @records = records end |
Instance Attribute Details
#records ⇒ Object (readonly)
Contains a hash of records. { ‘record_id’ => [pos1, pos2, pos] } – Weighting: www.perlmonks.com/index.pl?node_id=27509 W(T, D) = tf(T, D) * log ( DN / df(T)) weighting = frequency_in_this_record * log (total_number_of_records / number_of_matching_records)
12 13 14 |
# File 'lib/acts_as_indexed/search_atom.rb', line 12 def records @records end |
Instance Method Details
#+(other) ⇒ Object
Creates a new SearchAtom with the combined records from self and other
50 51 52 53 54 |
# File 'lib/acts_as_indexed/search_atom.rb', line 50 def +(other) SearchAtom.new(@records.clone.merge!(other.records) { |key, _old, _new| _old + _new }) end |
#-(other) ⇒ Object
Creates a new SearchAtom with records in other removed from self.
57 58 59 60 |
# File 'lib/acts_as_indexed/search_atom.rb', line 57 def -(other) records = @records.clone.reject { |name, records| other.records.include?(name) } SearchAtom.new(records) end |
#add_position(record_id, pos) ⇒ Object
Adds pos
to the array of positions for record_id
.
29 30 31 32 |
# File 'lib/acts_as_indexed/search_atom.rb', line 29 def add_position(record_id, pos) add_record(record_id) @records[record_id] << pos end |
#add_record(record_id) ⇒ Object
Adds record_id
to the stored records.
24 25 26 |
# File 'lib/acts_as_indexed/search_atom.rb', line 24 def add_record(record_id) @records[record_id] = [] unless include_record?(record_id) end |
#include_record?(record_id) ⇒ Boolean
Returns true if the given record is present.
19 20 21 |
# File 'lib/acts_as_indexed/search_atom.rb', line 19 def include_record?(record_id) @records.include?(record_id) end |
#positions(record_id) ⇒ Object
Returns an array of positions for record_id
stored in this Atom.
40 41 42 |
# File 'lib/acts_as_indexed/search_atom.rb', line 40 def positions(record_id) @records[record_id] end |
#preceded_by(former) ⇒ Object
Returns at atom containing the records and positions of self
preceded by former
“former latter” or “big dog” where “big” is the former and “dog” is the latter.
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/acts_as_indexed/search_atom.rb', line 64 def preceded_by(former) matches = SearchAtom.new latter = ActiveSupport::OrderedHash.new former.record_ids.each do |rid| latter[rid] = @records[rid] if @records[rid] end # Iterate over each record in latter. latter.each do |record_id,pos| # Iterate over each position. pos.each do |p| # Check if previous position is in former. if former.include_position?(record_id,p-1) matches.add_record(record_id) unless matches.include_record?(record_id) matches.add_position(record_id,p) end end end matches end |
#record_ids ⇒ Object
Returns all record IDs stored in this Atom.
35 36 37 |
# File 'lib/acts_as_indexed/search_atom.rb', line 35 def record_ids @records.keys end |
#remove_record(record_id) ⇒ Object
Removes record_id
from this Atom.
45 46 47 |
# File 'lib/acts_as_indexed/search_atom.rb', line 45 def remove_record(record_id) @records.delete(record_id) end |
#weightings(records_size) ⇒ Object
Returns a hash of record_ids and weightings for each record in the atom.
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/acts_as_indexed/search_atom.rb', line 88 def weightings(records_size) out = ActiveSupport::OrderedHash.new ## phurni 2012-09-21 when records_size is exactly the @records.size (all records are matches), the Math.log would ## return 0 which means the frequency (pos.size) will have no effect. Cheat to make it like the matching ## record is one less, so that we still can weight on frequency. matching_records_size = (records_size == @records.size ? @records.size - 1 : @records.size) @records.each do |r_id, pos| # Fixes a bug when the records_size is zero. i.e. The only record # contaning the word has been deleted. if records_size < 1 out[r_id] = 0.0 next end # weighting = frequency * log (records.size / records_with_atom) ## parndt 2010/05/03 changed to records_size.to_f to avoid -Infinity Errno::ERANGE exceptions ## which would happen for example Math.log(1 / 20) == -Infinity but Math.log(1.0 / 20) == -2.99573227355399 out[r_id] = pos.size * Math.log(records_size.to_f / matching_records_size) end out end |