Module: Clusterer::Similarity

Defined in:
lib/similarity.rb

Class Method Summary collapse

Class Method Details

.vector_similarity(cluster1, cluster2) ⇒ Object

find similarity between two clusters, or two points



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/similarity.rb', line 8

def Similarity.vector_similarity(cluster1, cluster2)
  similarity = 0
  total = 0
  cluster1.each do |w,value|
    next unless w.class == String
    total += (value*value) unless cluster1[:total]
    similarity += (value * (cluster2[w] || 0))
  end
  cluster1[:total] = total unless cluster1[:total]
  unless cluster2[:total]
    total = 0
    cluster2.each_value {|v| total += (v*v) }
    total = 1 if total == 0
    cluster2[:total] = total
  end
  cluster1[:total] = 1 if cluster1[:total] == 0
  similarity /= Math.sqrt(cluster1[:total] * cluster2[:total]).to_f
end