Class: Dat::Pure::Logic

Inherits:
Object
  • Object
show all
Defined in:
lib/dat/logic.rb

Constant Summary collapse

MIN_SIZE =
3
WEIGHT_THRESHOLD =
0.7
NUM_CHARS =
4

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dict, opt = {}) ⇒ Logic

Returns a new instance of Logic.



12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/dat/logic.rb', line 12

def initialize(dict, opt={})
  @dict = dict

  @add = opt.fetch(:add, true)
  @replace = opt.fetch(:replace, true)
  @delete = opt.fetch(:delete, true)
  @transpose = opt.fetch(:transpose, false)

  @min_size = opt.fetch(:min_size, MIN_SIZE)

  @perturb_cache = {}
  @cachable = @add && @replace && @delete && !@transpose
end

Instance Attribute Details

#min_sizeObject (readonly)

Returns the value of attribute min_size.



10
11
12
# File 'lib/dat/logic.rb', line 10

def min_size
  @min_size
end

Instance Method Details

#damlev(s, t) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/dat/logic.rb', line 74

def damlev(s, t)
  m, n = s.size, t.size
  return n if m == 0
  return m if n == 0
  inf = m + n
  h = Array.new(m+2) { Array.new(n+2) }

  h[0][0] = inf
  (0..m).each { |i| h[i+1][1] = i; h[i+1][0] = inf }
  (0..n).each { |j| h[1][j+1] = j; h[0][j+1] = inf }

  da = {}
  (s + t).each_char {|c| da[c] = 0 }

  (1..m).each do |i|
    db = 0
    (1..n).each do |j|
      i1 = da[t[j-1]]
      j1 = db
      d = ( (s[i-1] == t[j-1]) ? 0 : 1)
      db = j if d == 0
      h[i+1][j+1] = [ h[i][j] + d, h[i+1][j] + 1, h[i][j+1] + 1,
                      h[i1][j1] + (i-i1-1) + 1 + (j-j1-1) ].min
    end
    da[s[i-1]] = i
  end

  h[m+1][n+1]
end

#jaro_winkler(s, t) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/dat/logic.rb', line 31

def jaro_winkler(s, t)
  m, n = s.size, t.size
  return (n == 0 ? 1.0 : 0.0) if m == 0

  range = [0, ([m,n].max / 2) - 1].max

  s_matched = Array.new m, false
  t_matched = Array.new n, false

  common = 0
  (0...m).each do |i|
    start = [0, i-range].max
    fin = [i+range+1, n].min
    (start...fin).each do |j|
      next if t_matched[j] || s[i] != t[j]
      s_matched[i], t_matched[j] = true, true
      common += 1
      break
    end
  end
  return 0.0 if common == 0

  transposed = 0
  j = 0
  (0...m).each do |i|
    next if !s_matched[i]
    j += 1 while !t_matched[j]
    transposed += 1 if s[i] != t[j]
    j += 1
  end
  transposed /= 2

  weight = ((common.to_f/m) + (common.to_f/n) + ((common-transposed) / common.to_f)) / 3.0
  return weight if weight <= WEIGHT_THRESHOLD

  max = [NUM_CHARS, [m,n].min].min
  pos = 0
  pos += 1 while (pos < max && s[pos] == t[pos])
  return weight if (pos == 0)

  weight + 0.1 * pos * (1.0 - weight)
end

#leven(s, t) ⇒ Object



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/dat/logic.rb', line 104

def leven(s, t)
  m, n = s.size, t.size
  d = Array.new(m+1) { Array.new(n+1) }

  (0..m).each do |i|
    d[i][0] = i # the distance of any first string to an empty second string
    (0..n).each do |j|
      d[0][j] = j # the distance of any second string to an empty first string
    end
  end

  (1..n).each do |j|
    (1..m).each do |i|
      if s[i-1] == t[j-1]
        d[i][j] = d[i-1][j-1]
      else
        #               delete             insert            replace
        d[i][j] = [ (d[i-1][j] + 1), (d[i][j-1] + 1), (d[i-1][j-1] + 1)].min
      end
    end
  end

  d[m][n]
end

#perturb(wordstr, used = {}) ⇒ Object



26
27
28
# File 'lib/dat/logic.rb', line 26

def perturb(wordstr, used={})
  perturb_impl(wordstr).reject {|w| used[w.get] }
end