Module: TFIDF

Defined in:
lib/zipf/tfidf.rb

Class Method Summary collapse

Class Method Details

.idf(list_of_hashes) ⇒ Object

returns idf value for each word in a vocabulary



24
25
26
27
28
29
30
31
32
33
# File 'lib/zipf/tfidf.rb', line 24

def TFIDF::idf list_of_hashes
  vocab = list_of_hashes.values.flatten.uniq
  n = list_of_hashes.size.to_f
  idf = {}
  vocab.each { |i|
    df = list_of_hashes.values.flatten.count i
    idf[i] = Math.log(n/df)
  }
  return idf
end

.ntf(hash, a = 0.4) ⇒ Object

smoothes raw frequencies of tf() in-place a is a smoothing term



16
17
18
19
20
21
# File 'lib/zipf/tfidf.rb', line 16

def TFIDF::ntf hash, a=0.4
  max = hash.values.max.to_f
  hash.each_pair { |k,v|
    hash[k] = a + (1-a)*(v/max)
  }
end

.tf(array, stopwords = []) ⇒ Object

returns key=‘raw frequency’ for an array-like object



5
6
7
8
9
10
11
12
# File 'lib/zipf/tfidf.rb', line 5

def TFIDF::tf array, stopwords=[]
  v = {}; v.default = 0
  array.uniq.each { |i|
   next if stopwords.include? i
   v[i] = array.count(i).to_f
  }
  return v
end