Class: SimilarityTree::SimilarityMatrix

Inherits:
Object
  • Object
show all
Defined in:
lib/similarity_tree/similarity_matrix.rb

Overview

Table of the diff/similarity scores between different text documents

Instance Method Summary collapse

Constructor Details

#initialize(sources, options = {}) ⇒ SimilarityMatrix

Initialize a matrix for a set of documents



11
12
13
14
15
16
17
18
# File 'lib/similarity_tree/similarity_matrix.rb', line 11

def initialize(sources, options = {})
  @sources = sources
  @config = default_options.merge(options)

  @id = -1
  @source_index = Hash.new
  @matrix = nil
end

Instance Method Details

#build_tree(root_id, score_threshold = 0) ⇒ Object



32
33
34
35
36
37
38
39
40
# File 'lib/similarity_tree/similarity_matrix.rb', line 32

def build_tree(root_id, score_threshold = 0)
  # build the similarity tree
  @matrix = self.calculate if @matrix.nil?
  tree = SimilarityTree.new(root_id, @matrix, score_threshold).build

  # populate the nodes with the sources for the compatibility matrix
  tree.each_node {|n| n.content = @source_index[n.id] }
  tree
end

#calculateObject

calculate and output results as an array of arrays; optional block is run each comparison to help with any progress bars



22
23
24
25
26
27
28
29
30
# File 'lib/similarity_tree/similarity_matrix.rb', line 22

def calculate
  if @config[:calculation_method] == :tf_idf
    @matrix = calculate_with_tf_idf
  elsif @config[:calculation_method] == :diff
    @matrix = calculate_with_diff
  else
    raise "Unknown calculation type"
  end
end