Class: Dictionary::KL

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/bow/dictionary.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ KL

Returns a new instance of KL.



121
122
123
124
# File 'lib/rbbt/bow/dictionary.rb', line 121

def initialize(options = {})
  @pos_dict = Dictionary::TF_IDF.new(options)
  @neg_dict = Dictionary::TF_IDF.new(options)
end

Instance Attribute Details

#neg_dictObject (readonly)

Returns the value of attribute neg_dict.



119
120
121
# File 'lib/rbbt/bow/dictionary.rb', line 119

def neg_dict
  @neg_dict
end

#pos_dictObject (readonly)

Returns the value of attribute pos_dict.



119
120
121
# File 'lib/rbbt/bow/dictionary.rb', line 119

def pos_dict
  @pos_dict
end

Instance Method Details

#add(terms, c) ⇒ Object



130
131
132
133
# File 'lib/rbbt/bow/dictionary.rb', line 130

def add(terms, c)
  dict = (c == :+ || c == '+' ? @pos_dict : @neg_dict)
  dict.add(terms)
end

#best(options = {}) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/rbbt/bow/dictionary.rb', line 154

def best(options = {})
  high, low, limit = {
    :low   => 0,
    :high    => 1,
  }.merge(options).
  values_at(:high, :low, :limit)

  pos_df = @pos_dict.df
  neg_df = @neg_dict.df

  best = {}
  terms.select{|term|
    pos_df[term] >= low && pos_df[term] <= high ||
    neg_df[term] >= low && neg_df[term] <= high 
  }.each{|term|
    pos = pos_df[term]
    neg = neg_df[term]

    pos = 0.000001 if pos == 0
    pos = 0.999999 if pos == 1
    neg = 0.000001 if neg == 0
    neg = 0.999999 if neg == 1

    best[term] = pos * Math::log(pos / neg) + neg * Math::log(neg / pos)
  }
  if limit
    Hash[*best.sort{|a,b| b[1] <=>  a[1]}.slice(0, limit-1).flatten]
  else
    best
  end
end

#klObject



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/rbbt/bow/dictionary.rb', line 135

def kl
  kl = {}
  pos_df = @pos_dict.df
  neg_df = @neg_dict.df

  terms.each{|term|
    pos = pos_df[term]
    neg = neg_df[term]

    pos = 0.000001 if pos == 0
    pos = 0.999999 if pos == 1
    neg = 0.000001 if neg == 0
    neg = 0.999999 if neg == 1

    kl[term] = pos * Math::log(pos / neg) + neg * Math::log(neg / pos)
  }
  kl
end

#termsObject



126
127
128
# File 'lib/rbbt/bow/dictionary.rb', line 126

def terms
  (pos_dict.terms.keys + neg_dict.terms.keys).uniq
end

#weights(options = {}) ⇒ Object



186
187
188
# File 'lib/rbbt/bow/dictionary.rb', line 186

def weights(options = {})
  best(options)
end