Class: Clusterer::InverseDocumentFrequency

Inherits:
Hash
  • Object
show all
Defined in:
lib/clusterer/inverse_document_frequency.rb

Overview

InverseDocumentFrequency maintains a count of the total number of documents and the number of documents where a term has been seen with the help of helper classes. It also calculates the normalizing factor, the formula for whichis Math.log(total_number of documents/ number of documents containing the term)

Instance Method Summary collapse

Constructor Details

#initialize(options = { }) ⇒ InverseDocumentFrequency

Returns a new instance of InverseDocumentFrequency.



64
65
66
67
68
# File 'lib/clusterer/inverse_document_frequency.rb', line 64

def initialize (options = { })
  @terms_count = options[:terms_count] || TermsCount.new
  @nf = Hash.new
  @documents_count = options[:documents_count] || DocumentsCount.new
end

Instance Method Details

#<<(term) ⇒ Object



74
75
76
# File 'lib/clusterer/inverse_document_frequency.rb', line 74

def << (term)
  @terms_count.increment_count(term) unless term.nil? || term.empty?
end

#[](term) ⇒ Object



78
79
80
# File 'lib/clusterer/inverse_document_frequency.rb', line 78

def [] (term)
  @nf[term] ||= (@terms_count[term] && @documents_count.value >1) ? Math.log(@documents_count.value/@terms_count[term].to_f) : 1.0
end

#clean_cached_normalizing_factorObject



60
61
62
# File 'lib/clusterer/inverse_document_frequency.rb', line 60

def clean_cached_normalizing_factor
  @nf.clear
end

#documents_countObject



56
57
58
# File 'lib/clusterer/inverse_document_frequency.rb', line 56

def documents_count
  @documents_count.value
end

#increment_documents_countObject



70
71
72
# File 'lib/clusterer/inverse_document_frequency.rb', line 70

def increment_documents_count
  @documents_count.increment
end