Module: MiniLevenshtein

Defined in:
lib/mini-levenshtein.rb,
lib/mini-levenshtein/version.rb,
ext/mini_levenshtein/mini_levenshtein.c

Constant Summary collapse

VERSION =
'0.1.2'

Class Method Summary collapse

Class Method Details

.edit_distance(string1, string2) ⇒ Integer Also known as: distance

Compute absolute Levenshtein distance of two strings.

Examples:

it’s hard to spell Levenshtein correctly

edit_distance('Levenshtein', 'Lenvinsten')  # => 4
edit_distance('Levenshtein', 'Levensthein') # => 2
edit_distance('Levenshtein', 'Levenshten')  # => 1
edit_distance('Levenshtein', 'Levenshtein') # => 0
"Yeah, we've managed it at last."

Parameters:

  • string1 (String)
  • string2 (String)

Returns:

  • (Integer)


46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'ext/mini_levenshtein/mini_levenshtein.c', line 46

VALUE rb_edit_distance(VALUE self, VALUE string1, VALUE string2)
{
  Check_Type(string1, T_STRING);
  Check_Type(string2, T_STRING);
  
  size_t len1 = RSTRING_LEN(string1);
  size_t len2 = RSTRING_LEN(string2);

  const lev_byte *str1 = StringValuePtr(string1);
  const lev_byte *str2 = StringValuePtr(string2);

  long distance = lev_edit_distance(len1, str1, len2, str2, 0);

  return INT2NUM(distance);
}

.lev_edit_distance(s1, s2, xcost) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'ext/mini_levenshtein/mini_levenshtein.c', line 12

VALUE rb_lev_edit_distance(VALUE self, VALUE s1, VALUE s2, VALUE xcost)
{
  Check_Type(s1, T_STRING);
  Check_Type(s2, T_STRING);
  Check_Type(xcost, T_FIXNUM);
  
  size_t len1 = RSTRING_LEN(s1);
  size_t len2 = RSTRING_LEN(s2);

  const lev_byte *str1 = StringValuePtr(s1);
  const lev_byte *str2 = StringValuePtr(s2);

  int cost = FIX2INT(xcost);

  long distance = lev_edit_distance(len1, str1, len2, str2, cost);

  return INT2NUM(distance);
}

.similarity(string1, string2) ⇒ Float Also known as: ratio

Compute similarity of two strings.

The similarity is a number between 0 and 1.

Examples:

similarity('Hello World!', 'Holly grail!') # => 0.5833...
similarity('Holmes', 'Jack') # => 0.0

Parameters:

  • string1 (String)
  • string2 (String)

Returns:

  • (Float)


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'ext/mini_levenshtein/mini_levenshtein.c', line 78

VALUE rb_similarity(VALUE self, VALUE string1, VALUE string2)
{
  Check_Type(string1, T_STRING);
  Check_Type(string2, T_STRING);
  
  size_t len1 = RSTRING_LEN(string1);
  size_t len2 = RSTRING_LEN(string2);

  const lev_byte *str1 = StringValuePtr(string1);
  const lev_byte *str2 = StringValuePtr(string2);

  size_t lensum = len1 + len2;
  if (lensum == 0) {
    return DBL2NUM(1.0);
  }
  
  long distance = lev_edit_distance(len1, str1, len2, str2, 0);
  if (distance == 0) {
    return DBL2NUM(1.0);
  }

  return DBL2NUM((lensum - distance) / (double)lensum);
}