Class: Levenshtein

Inherits:
Object
  • Object
show all
Defined in:
lib/levenshtein_ruby.rb,
ext/levenshtein_ruby/levenshtein_ruby.c

Instance Method Summary collapse

Instance Method Details

#distance(s_word1, s_word2) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'ext/levenshtein_ruby/levenshtein_ruby.c', line 7

VALUE distance_func(VALUE self, VALUE s_word1, VALUE s_word2)
{
    char* word1 = StringValuePtr(s_word1);
    char* word2 = StringValuePtr(s_word2);
    int len1 = strlen(word1);
    int len2 = strlen(word2);
    int matrix[len1 + 1][len2 + 1];
    int i;
    for (i = 0; i <= len1; i++) {
        matrix[i][0] = i;
    }
    for (i = 0; i <= len2; i++) {
        matrix[0][i] = i;
    }
    for (i = 1; i <= len1; i++) {
        int j;
        char c1;

        c1 = word1[i-1];
        for (j = 1; j <= len2; j++) {
            char c2;

            c2 = word2[j-1];
            if (c1 == c2) {
                matrix[i][j] = matrix[i-1][j-1];
            }
            else {
                int delete;
                int insert;
                int substitute;
                int minimum;

                delete = matrix[i-1][j] + 1;
                insert = matrix[i][j-1] + 1;
                substitute = matrix[i-1][j-1] + 1;
                minimum = delete;
                if (insert < minimum) {
                    minimum = insert;
                }
                if (substitute < minimum) {
                    minimum = substitute;
                }
                matrix[i][j] = minimum;
            }
        }
    }
    return INT2NUM(matrix[len1][len2]);
}

#normalized_distance(a1, a2, threshold = nil, options = {}) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/levenshtein_ruby.rb', line 5

def normalized_distance(a1, a2, threshold=nil, options={})
  size  = [a1.size, a2.size].max

  if a1.size == 0 and a2.size == 0
    0.0
  elsif a1.size == 0
    a2.size.to_f/size
  elsif a2.size == 0
    a1.size.to_f/size
  else
    if threshold
      if d = self.distance(a1, a2, (threshold*size).to_i+1)
        d.to_f/size
      else
        nil
      end
    else
      self.distance(a1, a2).to_f/size
    end
  end
end