14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/runestone/corpus.rb', line 14
def self.similar_words(*words)
lut = {}
words = words.inject([]) do |ws, w|
tt = typo_tolerance(w)
ws << "#{Runestone::Model.connection.quote(w)}, #{Runestone::Model.connection.quote(w.downcase)}, #{tt}" if tt > 0
ws
end
return lut if words.size == 0
result = Runestone::Model.connection.execute(<<-SQL)
WITH tokens (token, token_downcased, typo_tolerance) AS (VALUES (#{words.join('), (')}))
SELECT token, word, levenshtein(runestone_corpus.word, tokens.token_downcased)
FROM tokens
JOIN runestone_corpus ON runestone_corpus.word % tokens.token_downcased
WHERE
runestone_corpus.word != tokens.token_downcased
AND levenshtein(runestone_corpus.word, tokens.token_downcased) <= tokens.typo_tolerance
SQL
result.each_row do |t, w, l|
w.gsub!(/\(|\)|:|\||!|\&|\*/, '')
next if w == t
lut[t] ||= []
lut[t] << w
end
lut
end
|