Class: Namor::Namor

Inherits:
Object
  • Object
show all
Defined in:
lib/namor/namor.rb

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Namor

Returns a new instance of Namor.



2
3
4
# File 'lib/namor/namor.rb', line 2

def initialize(opts = {})
  config(opts)
end

Instance Method Details

#components(*args) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/namor/namor.rb', line 94

def components(*args)
  suppression_list = @config[:suppress] ? @config[:suppress].map(&:upcase) : []

  names = args
  bits = []
  names.compact.each do |name|
    name = name.dup
    name.gsub!(/\([^\(]*\)/, '')
    name.gsub!(/\[[^\[]*\]/, '')
    name.gsub!(/[\(\)\[\]\']/, '')
    name.gsub!(/[,._-]/, ' ')
    bits += name.split(/\s+/).map(&:upcase)
  end

  suppress_re = %w{MD JR SR I+ IV}.join('|')
  bits.delete_if {|bit| suppression_list.include?(bit) || bit =~ /^(#{suppress_re})$/}
  bits.delete_if(&:empty?)
  bits.uniq.sort
end

#config(opts) ⇒ Object



6
7
8
# File 'lib/namor/namor.rb', line 6

def config(opts)
  @config = opts
end

#demaiden(lastname) ⇒ Object



29
30
31
32
33
34
35
36
# File 'lib/namor/namor.rb', line 29

def demaiden(lastname)
  return [nil,nil] unless lastname && !lastname.empty?
  if lastname =~ /\-/
    [lastname.gsub(/ /, ''), lastname.split(/\-/).last.gsub(/ /, '')]
  else
    [lastname.gsub(/ /, ''), lastname.split(/ /).last]
  end
end

#extract(name, opts = {}) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/namor/namor.rb', line 46

def extract(name, opts = {})
  return [] if name.nil?

  detitled_name = scrub(name, opts)

  if detitled_name =~ /,/
    # "last, first[ middle]"
    lastname, firstname = detitled_name.split(/\s*,\s*/)
    lastname, de_maidened_last = demaiden(lastname)
    middlename = nil
    if firstname && firstname =~ / /
      pieces = firstname.split(/ +/)
      firstname = pieces.shift
      middlename = pieces.join if pieces.any?
    end
  else
    # "first [middle-initial ]last" or "first everything-else-is-the-lastname"
    pieces = detitled_name.split(' ')
    firstname = pieces.shift
    if pieces.count > 1 && pieces.first.length == 1
      # assume this is a middle initial
      middlename = pieces.shift
    else
      middlename = nil
    end

    lastname, de_maidened_last = demaiden(pieces.join(' '))
  end

  firstname = final_cleaning(firstname)
  middlename = final_cleaning(middlename)
  lastname = final_cleaning(lastname)
  de_maidened_last = final_cleaning(de_maidened_last)

  fm = [firstname, middlename].compact.join(' ')
  fullname = [lastname, fm].compact.join(',')
  nee_fullname = [de_maidened_last, fm].compact.join(',')

  [firstname, middlename, lastname, fullname, nee_fullname]
end

#extract_with_cluster(name, opts = {}) ⇒ Object



87
88
89
90
91
# File 'lib/namor/namor.rb', line 87

def extract_with_cluster(name, opts = {})
  ary = extract(name, opts)
  return [] if ary.empty?
  ary << ary.last.gsub(/\W/, '_')
end

#final_cleaning(name) ⇒ Object



38
39
40
41
42
43
44
# File 'lib/namor/namor.rb', line 38

def final_cleaning(name)
  if name && !name.empty?
    name.gsub(/\-/, '')
  else
    nil
  end
end

#fullscrub(name, opts = {}) ⇒ Object



25
26
27
# File 'lib/namor/namor.rb', line 25

def fullscrub(name, opts = {})
  final_cleaning(scrub(name, opts))
end

#scrub(name, opts = {}) ⇒ Object

clean up a single name component

  • output all converted to uppercase

  • strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)

  • remove any words that are in the user-provided suppression list

  • remove words from list of common suffixes (Jr, Sr etc)

  • remove anything inside parenthesis

  • remove punctuation

  • squeeze whitespace & trim spaces from ends



18
19
20
21
22
23
# File 'lib/namor/namor.rb', line 18

def scrub(name, opts = {})
  suppression_list = @config[:suppress] || []
  suppression_re = Regexp.new('(\s|^)' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '(\s|\.|$)')

  name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
end