Class: ActsAsIndexed::SearchAtom

Inherits:
Object
  • Object
show all
Defined in:
lib/acts_as_indexed/search_atom.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(records = ActiveSupport::OrderedHash.new) ⇒ SearchAtom

Returns a new instance of SearchAtom.



14
15
16
# File 'lib/acts_as_indexed/search_atom.rb', line 14

def initialize(records=ActiveSupport::OrderedHash.new)
  @records = records
end

Instance Attribute Details

#recordsObject (readonly)

Contains a hash of records. { ‘record_id’ => [pos1, pos2, pos] } – Weighting: www.perlmonks.com/index.pl?node_id=27509 W(T, D) = tf(T, D) * log ( DN / df(T)) weighting = frequency_in_this_record * log (total_number_of_records / number_of_matching_records)



12
13
14
# File 'lib/acts_as_indexed/search_atom.rb', line 12

def records
  @records
end

Instance Method Details

#+(other) ⇒ Object

Creates a new SearchAtom with the combined records from self and other



50
51
52
53
54
# File 'lib/acts_as_indexed/search_atom.rb', line 50

def +(other)
  SearchAtom.new(@records.clone.merge!(other.records) { |key, _old, _new|
                                                        _old + _new
                                                      })
end

#-(other) ⇒ Object

Creates a new SearchAtom with records in other removed from self.



57
58
59
60
# File 'lib/acts_as_indexed/search_atom.rb', line 57

def -(other)
  records = @records.clone.reject { |name, records| other.records.include?(name) }
  SearchAtom.new(records)
end

#add_position(record_id, pos) ⇒ Object

Adds pos to the array of positions for record_id.



29
30
31
32
# File 'lib/acts_as_indexed/search_atom.rb', line 29

def add_position(record_id, pos)
  add_record(record_id)
  @records[record_id] << pos
end

#add_record(record_id) ⇒ Object

Adds record_id to the stored records.



24
25
26
# File 'lib/acts_as_indexed/search_atom.rb', line 24

def add_record(record_id)
  @records[record_id] = [] unless include_record?(record_id)
end

#include_record?(record_id) ⇒ Boolean

Returns true if the given record is present.

Returns:

  • (Boolean)


19
20
21
# File 'lib/acts_as_indexed/search_atom.rb', line 19

def include_record?(record_id)
  @records.include?(record_id)
end

#positions(record_id) ⇒ Object

Returns an array of positions for record_id stored in this Atom.



40
41
42
# File 'lib/acts_as_indexed/search_atom.rb', line 40

def positions(record_id)
  @records[record_id]
end

#preceded_by(former) ⇒ Object

Returns at atom containing the records and positions of self preceded by former “former latter” or “big dog” where “big” is the former and “dog” is the latter.



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/acts_as_indexed/search_atom.rb', line 64

def preceded_by(former)
  matches = SearchAtom.new
  latter = ActiveSupport::OrderedHash.new
  former.record_ids.each do |rid|
    latter[rid] = @records[rid] if @records[rid]
  end
  # Iterate over each record in latter.
  latter.each do |record_id,pos|

    # Iterate over each position.
    pos.each do |p|
      # Check if previous position is in former.
      if former.include_position?(record_id,p-1)
        matches.add_record(record_id) unless matches.include_record?(record_id)
        matches.add_position(record_id,p)
      end
    end

  end
  matches
end

#record_idsObject

Returns all record IDs stored in this Atom.



35
36
37
# File 'lib/acts_as_indexed/search_atom.rb', line 35

def record_ids
  @records.keys
end

#remove_record(record_id) ⇒ Object

Removes record_id from this Atom.



45
46
47
# File 'lib/acts_as_indexed/search_atom.rb', line 45

def remove_record(record_id)
  @records.delete(record_id)
end

#weightings(records_size) ⇒ Object

Returns a hash of record_ids and weightings for each record in the atom.



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/acts_as_indexed/search_atom.rb', line 88

def weightings(records_size)
  out = ActiveSupport::OrderedHash.new

  ## phurni 2012-09-21 when records_size is exactly the @records.size (all records are matches), the Math.log would
  ## return 0 which means the frequency (pos.size) will have no effect. Cheat to make it like the matching
  ## record is one less, so that we still can weight on frequency.
  matching_records_size = (records_size == @records.size ? @records.size - 1 : @records.size)

  @records.each do |r_id, pos|

    # Fixes a bug when the records_size is zero. i.e. The only record
    # contaning the word has been deleted.
    if records_size < 1
      out[r_id] = 0.0
      next
    end

    # weighting = frequency * log (records.size / records_with_atom)
    ## parndt 2010/05/03 changed to records_size.to_f to avoid -Infinity Errno::ERANGE exceptions
    ## which would happen for example Math.log(1 / 20) == -Infinity but Math.log(1.0 / 20) == -2.99573227355399
    out[r_id] = pos.size * Math.log(records_size.to_f / matching_records_size)
  end

  out
end