Module: JaroWinkler
- Defined in:
- lib/jaro_winkler.rb,
lib/jaro_winkler/version.rb,
lib/jaro_winkler/fallback.rb,
ext/jaro_winkler/jaro_winkler.c
Constant Summary collapse
- VERSION =
"1.2.8"
Class Method Summary collapse
- .c_distance ⇒ Object
- .distance ⇒ Object
- .fallback? ⇒ Boolean
- .jaro_distance(s1, s2) ⇒ Object
- .r_distance(s1, s2, options = {}) ⇒ Object
Class Method Details
.c_distance ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/jaro_winkler.rb', line 60 def r_distance s1, s2, = {} = {weight: 0.1, threshold: 0.7, ignore_case: false}.merge weight, threshold, ignore_case = [:weight], [:threshold], [:ignore_case] raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25 s1, s2 = s1.upcase, s2.upcase if ignore_case distance = jaro_distance(s1, s2) prefix = 0 max_length = [4, s1.length, s2.length].min s1[0, max_length].chars.each_with_index do |c1, i| c1 == s2[i] ? prefix += 1 : break end distance < threshold ? distance : distance + ((prefix * weight) * (1 - distance)) end |
.distance ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/jaro_winkler.rb', line 59 def r_distance s1, s2, = {} = {weight: 0.1, threshold: 0.7, ignore_case: false}.merge weight, threshold, ignore_case = [:weight], [:threshold], [:ignore_case] raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25 s1, s2 = s1.upcase, s2.upcase if ignore_case distance = jaro_distance(s1, s2) prefix = 0 max_length = [4, s1.length, s2.length].min s1[0, max_length].chars.each_with_index do |c1, i| c1 == s2[i] ? prefix += 1 : break end distance < threshold ? distance : distance + ((prefix * weight) * (1 - distance)) end |
.fallback? ⇒ Boolean
3 4 5 |
# File 'lib/jaro_winkler/fallback.rb', line 3 def fallback? RUBY_PLATFORM == 'java' end |
.jaro_distance(s1, s2) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/jaro_winkler.rb', line 5 def jaro_distance s1, s2 length1, length2 = s1.length, s2.length # Guarantee the length order if s1.length > s2.length s1, s2 = s2, s1 length1, length2 = length2, length1 end window_size = (length2 / 2) - 1 window_size = 0 if window_size < 0 matches = 0.0 transpositions = 0 previous_index = -1 max_index = length2 - 1 s1.chars.each_with_index do |c1, i| left = i - window_size right = i + window_size left = 0 if left < 0 right = max_index if right > max_index matched = false found = false s2[left..right].chars.each_with_index do |c2, j| if c1 == c2 matched = true s2_index = left + j if !found && s2_index > previous_index previous_index = s2_index found = true end end end if matched matches += 1 transpositions += 1 unless found end end # Don't divide transpositions by 2 since it's been counted directly by above code. matches == 0 ? 0 : (matches / length1 + matches / length2 + (matches - transpositions) / matches) / 3.0 end |
.r_distance(s1, s2, options = {}) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/jaro_winkler.rb', line 44 def r_distance s1, s2, = {} = {weight: 0.1, threshold: 0.7, ignore_case: false}.merge weight, threshold, ignore_case = [:weight], [:threshold], [:ignore_case] raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25 s1, s2 = s1.upcase, s2.upcase if ignore_case distance = jaro_distance(s1, s2) prefix = 0 max_length = [4, s1.length, s2.length].min s1[0, max_length].chars.each_with_index do |c1, i| c1 == s2[i] ? prefix += 1 : break end distance < threshold ? distance : distance + ((prefix * weight) * (1 - distance)) end |