Class: TfIdf

Inherits:
Object
  • Object
show all
Defined in:
lib/tf_idf.rb

Instance Method Summary collapse

Constructor Details

#initialize(data, n = 1) ⇒ TfIdf

n the n-grams of the data en.wikipedia.org/wiki/N-gram



6
7
8
9
# File 'lib/tf_idf.rb', line 6

def initialize(data, n=1)
  @data = data
  @n = n
end

Instance Method Details

#idfObject



15
16
17
# File 'lib/tf_idf.rb', line 15

def idf
  @idf ||= calculate_inverse_document_frequency
end

#tfObject



11
12
13
# File 'lib/tf_idf.rb', line 11

def tf
  @tf ||= calculate_term_frequencies
end

#tf_idfObject

This is basically calculated by multiplying tf by idf



20
21
22
23
24
25
26
27
28
29
30
# File 'lib/tf_idf.rb', line 20

def tf_idf
  tf_idf = tf.clone
  
  tf.each_with_index do |document, index|
    document.each_pair do |term, tf_score|
      tf_idf[index][term] = tf_score * idf[term]
    end
  end
  
  tf_idf
end

#total_documentsObject



32
33
34
# File 'lib/tf_idf.rb', line 32

def total_documents
  @data.size.to_f
end