Module: Emiler
- Defined in:
- lib/emiler.rb,
lib/emiler/version.rb
Constant Summary collapse
- INEXACT_MATCH_COEFFICIENT =
ENV['INEXACT_MATCH_COEFFICIENT'] || 0.8
- RAISE_ON_MALFORMED_EMAIL =
ENV['RAISE_ON_MALFORMED_EMAIL']
- VERSION =
"0.2.2"
Class Method Summary collapse
-
.similarity(e1, e2) ⇒ Object
rubocop:disable Metrics/AbcSize.
Class Method Details
.similarity(e1, e2) ⇒ Object
rubocop:disable Metrics/AbcSize
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/emiler.rb', line 31 def similarity e1, e2 e1, e2 = [e1, e2].map(&:to_s).map(&:downcase) em1, em2 = [e1, e2].map { |e| e.split '@' } if em1.size != 2 || em2.size != 2 raise MalformedEmailError.new(e1, e2) if RAISE_ON_MALFORMED_EMAIL return JW::DUMMY end jw = JW::MATCHER.distance e1, e2 domain = case when em1.last == em2.last then 1 # exact domain match when [em1, em2].map { |e| e.last.split('.')[-2] }.reduce(:==) then INEXACT_MATCH_COEFFICIENT else INEXACT_MATCH_COEFFICIENT / 2.0 * JW::MATCHER.distance(em1.last, em2.last) end name = case when em1.first == em2.first then 1 # exact match when ![em1, em2].map { |e| e.first.scan(/[a-z]+/) }.reduce(:&).empty? then INEXACT_MATCH_COEFFICIENT else INEXACT_MATCH_COEFFICIENT / 2.0 * JW::MATCHER.distance(em1.first, em2.first) end full = domain * 0.2 + name * 0.8 { jw: jw, full: full, name: name, domain: domain, result: full >= 0.64 } end |