Class: FuzzyMatch::Score::PureRuby

Inherits:
FuzzyMatch::Score show all
Defined in:
lib/fuzzy_match/score/pure_ruby.rb

Constant Summary collapse

SPACE =
' '

Instance Attribute Summary

Attributes inherited from FuzzyMatch::Score

#str1, #str2

Instance Method Summary collapse

Methods inherited from FuzzyMatch::Score

#<=>, #initialize

Constructor Details

This class inherits a constructor from FuzzyMatch::Score

Instance Method Details

#dices_coefficient_similarObject



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/fuzzy_match/score/pure_ruby.rb', line 12

def dices_coefficient_similar
  @dices_coefficient_similar ||= begin
    if str1 == str2
      1.0
    elsif str1.length == 1 and str2.length == 1
      0.0
    else
      pairs1 = (0..str1.length-2).map do |i|
        str1[i,2]
      end.reject do |pair|
        pair.include? SPACE
      end
      pairs2 = (0..str2.length-2).map do |i|
        str2[i,2]
      end.reject do |pair|
        pair.include? SPACE
      end
      union = pairs1.size + pairs2.size
      intersection = 0
      pairs1.each do |p1|
        0.upto(pairs2.size-1) do |i|
          if p1 == pairs2[i]
            intersection += 1
            pairs2.slice!(i)
            break
          end
        end
      end
      (2.0 * intersection) / union
    end
  end
end

#inspectObject



7
8
9
# File 'lib/fuzzy_match/score/pure_ruby.rb', line 7

def inspect
  %{#<FuzzyMatch::Score::PureRuby: str1=#{str1.inspect} str2=#{str2.inspect} dices_coefficient_similar=#{dices_coefficient_similar} levenshtein_similar=#{levenshtein_similar}>}
end

#levenshtein_similarObject

extracted/adapted from the text gem version 1.0.2 normalization added for utf-8 strings lib/text/levenshtein.rb



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/fuzzy_match/score/pure_ruby.rb', line 48

def levenshtein_similar
  @levenshtein_similar ||= begin
    if utf8?
      unpack_rule = 'U*'
    else
      unpack_rule = 'C*'
    end
    s = str1.unpack(unpack_rule)
    t = str2.unpack(unpack_rule)
    n = s.length
    m = t.length
  
    if n == 0 or m == 0
      0.0
    else
      d = (0..m).to_a
      x = nil
      (0...n).each do |i|
        e = i+1
        (0...m).each do |j|
          cost = (s[i] == t[j]) ? 0 : 1
          x = [
            d[j+1] + 1, # insertion
            e + 1,      # deletion
            d[j] + cost # substitution
          ].min
          d[j] = e
          e = x
        end
        d[m] = x
      end
      # normalization logic from https://github.com/flori/amatch/blob/master/ext/amatch_ext.c#L301
      # if (b_len > a_len) {
      #     result = rb_float_new(1.0 - ((double) v[p][b_len]) / b_len);
      # } else {
      #     result = rb_float_new(1.0 - ((double) v[p][b_len]) / a_len);
      # }
      1.0 - x.to_f / [n, m].max
    end
  end
end