Module: BlackStack::Strings::Comparing

Defined in:
lib/functions.rb

Overview


Fuzzy String Comparsion Functions: How similar are 2 strings that are not exactly equal.


Class Method Summary collapse

Class Method Details

.levenshtein_distance(s, t) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/functions.rb', line 121

def self.levenshtein_distance(s, t)  
  s.downcase!
  t.downcase!

  m = s.length
  n = t.length
  return m if n == 0
  return n if m == 0
  d = Array.new(m+1) {Array.new(n+1)}

  (0..m).each {|i| d[i][0] = i}
  (0..n).each {|j| d[0][j] = j}
  (1..n).each do |j|
    (1..m).each do |i|
      d[i][j] = if s[i-1] == t[j-1]  # adjust index into string
                  d[i-1][j-1]       # no operation required
                else
                  [ d[i-1][j]+1,    # deletion
                    d[i][j-1]+1,    # insertion
                    d[i-1][j-1]+1,  # substitution
                  ].min
                end
    end
  end
  d[m][n]
end

.max_sardi_distance(s) ⇒ Object

retorna la cantidad de palabras con mas de 3 caracteres que se encuentran en el parametro s



149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/functions.rb', line 149

def self.max_sardi_distance(s)
  s.downcase!
  s.gsub!(/-/,' ')
  ss = s.scan(/\b([a-z]+)\b/)
  n = 0
  ss.each { |x|
    x = x[0]
    if (x.size > 3) # para evitar keywords triviales como 'and'
      n += 1
    end
  }
  n
end

.sardi_distance(s, t) ⇒ Object

retorna la cantidad de palabras con mas de 3 caracteres del parametro s que se encuentran en el parametro t



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/functions.rb', line 164

def self.sardi_distance(s, t)
  s.downcase!
  t.downcase!
  s.gsub!(/-/,' ')
  t.gsub!(/-/,' ')
  max_distance = max_sardi_distance(s)  
  ss = s.scan(/\b([a-z]+)\b/)
  tt = t.scan(/\b([a-z]+)\b/)
  n = 0
  ss.each { |x|
    x = x[0]
    if (x.size > 3) # para evitar keywords triviales como 'and'
      if ( tt.select { |y| y[0] == x }.size > 0 )
        n += 1
      end
    end
  }
  return max_distance - n
end