Module: Emiler

Defined in:
lib/emiler.rb,
lib/emiler/version.rb

Constant Summary collapse

INEXACT_MATCH_COEFFICIENT =
ENV['INEXACT_MATCH_COEFFICIENT'] || 0.8
RAISE_ON_MALFORMED_EMAIL =
ENV['RAISE_ON_MALFORMED_EMAIL']
VERSION =
"0.2.2"

Class Method Summary collapse

Class Method Details

.similarity(e1, e2) ⇒ Object

rubocop:disable Metrics/AbcSize



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/emiler.rb', line 31

def similarity e1, e2
  e1, e2 = [e1, e2].map(&:to_s).map(&:downcase)
  em1, em2 = [e1, e2].map { |e| e.split '@' }

  if em1.size != 2 || em2.size != 2
    raise MalformedEmailError.new(e1, e2) if RAISE_ON_MALFORMED_EMAIL
    return JW::DUMMY
  end

  jw = JW::MATCHER.distance e1, e2

  domain = case
           when em1.last == em2.last then 1 # exact domain match
           when [em1, em2].map { |e| e.last.split('.')[-2] }.reduce(:==) then INEXACT_MATCH_COEFFICIENT
           else INEXACT_MATCH_COEFFICIENT / 2.0 * JW::MATCHER.distance(em1.last, em2.last)
           end

  name = case
         when em1.first == em2.first then 1 # exact match
         when ![em1, em2].map { |e| e.first.scan(/[a-z]+/) }.reduce(:&).empty? then INEXACT_MATCH_COEFFICIENT
         else INEXACT_MATCH_COEFFICIENT / 2.0 * JW::MATCHER.distance(em1.first, em2.first)
         end

  full = domain * 0.2 + name * 0.8

  { jw: jw, full: full, name: name, domain: domain, result: full >= 0.64 }
end