3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
# File 'lib/rubyfish/jaro_winkler.rb', line 3
def _distance a, b, opts = {}
long_tolerance = opts[:long_tolerance]
winklerize = opts[:winklerize]
ignore_case = opts[:ignore_case]
as = a.to_s
bs = b.to_s
if ignore_case
as.downcase!
bs.downcase!
end
as_length = as.size
bs_length = bs.size
if as_length == 0 && bs_length == 0
return 1
end
if as_length == 0 || bs_length == 0
return 0
end
if as_length > bs_length
search_range = as_length
min_len = bs_length
else
search_range = bs_length
min_len = as_length
end
as_flag = Array.new(as_length + 1, false)
bs_flag = Array.new(bs_length + 1, false)
search_range = (search_range / 2) - 1
search_range = 0 if search_range < 0
common_chars = 0
(0...as_length).each do |i|
low_lim = (i >= search_range) ? i - search_range : 0
hi_lim = (i + search_range <= bs_length - 1) ? (i + search_range) : bs_length - 1
(low_lim..hi_lim).each do |j|
if !bs_flag[j] && bs[j] == as[i]
as_flag[i] = bs_flag[j] = true
common_chars += 1
break
end
end
end
return 0 if common_chars == 0
k = trans_count = 0
(0...as_length).each do |i|
if as_flag[i]
for j in (k...bs_length) do
if bs_flag[j]
k = j + 1
break
end
end
trans_count += 1 if as[i] != bs[j]
end
end
trans_count = trans_count / 2
one_third = 1.0/3
weight = ( one_third * common_chars / as_length +
one_third * common_chars / bs_length +
one_third * (common_chars - trans_count) / common_chars )
if winklerize && weight > 0.7
j = (min_len >= 4) ? 4 : min_len
i = 0
while ((i<j)&&(as[i]==bs[i])&&((as[i].ord > 57) || (as[i].ord < 48)))
i+=1
end
weight += i * 0.1 * (1.0 - weight) if i > 0
if long_tolerance && (min_len>4) && (common_chars > i+1) && (2 * common_chars >= min_len + i)
if as[0].ord > 57 || as[0].ord < 48
weight += (1.0 - weight) * (common_chars - i - 1) / (as_length + bs_length - i * 2 + 2).to_f
end
end
end
weight
end
|