Method: WhatLanguage#process_text
- Defined in:
- lib/whatlanguage.rb
#process_text(text) ⇒ Object
Very inefficient method for now.. but still beats the non-Bloom alternatives. Change to better bit comparison technique later..
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/whatlanguage.rb', line 59 def process_text(text) results = Hash.new(0) it = 0 to_lowercase(text).split.each do |word| it += 1 languages.each do |lang| results[lang] += 1 if @@data[lang].includes?(word) end # Every now and then check to see if we have a really convincing result.. if so, exit early. if it % 4 == 0 && results.size > 1 top_results = results.sort_by{|a,b| -b}[0..1] # Next line may need some tweaking one day.. break if top_results[0][1] > 4 && ((top_results[0][1] > top_results[1][1] * 2) || (top_results[0][1] - top_results[1][1] > 25)) end #break if it > 100 end results end |