Class: Namor::Namor

Inherits:
Object
  • Object
show all
Defined in:
lib/namor/namor.rb

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Namor

Returns a new instance of Namor.



2
3
4
5
# File 'lib/namor/namor.rb', line 2

def initialize(opts = {})
  config(opts)
  @re_cache = {}
end

Instance Method Details

#assemble(firstname, middlename, lastname, de_maidened_last) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/namor/namor.rb', line 70

def assemble(firstname, middlename, lastname, de_maidened_last)
  firstname = final_cleaning(firstname)
  middlename = final_cleaning(middlename)
  lastname = final_cleaning(lastname)
  de_maidened_last = final_cleaning(de_maidened_last)

  fm = [firstname, middlename].compact.join(' ')
  fm = nil if fm.empty?
  fullname = [lastname, fm].compact.join(',')
  nee_fullname = [de_maidened_last, fm].compact.join(',')

  [firstname, middlename, lastname, fullname, nee_fullname]
end

#components(*args) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/namor/namor.rb', line 138

def components(*args)
  suppression_list = @config[:suppress] ? @config[:suppress].map(&:upcase) : []

  names = args
  bits = []
  names.compact.each do |name|
    name = name.dup
    name.gsub!(/\([^\(]*\)/, '')
    name.gsub!(/\[[^\[]*\]/, '')
    name.gsub!(/[\(\)\[\]\']/, '')
    name.gsub!(/[,._-]/, ' ')
    bits += name.split(/\s+/).map(&:upcase)
  end

  suppress_re = %w{MD JR SR I+ IV}.join('|')
  bits.delete_if {|bit| suppression_list.include?(bit) || bit =~ /^(#{suppress_re})$/}
  bits.delete_if(&:empty?)
  bits.uniq.sort
end

#config(opts) ⇒ Object



7
8
9
# File 'lib/namor/namor.rb', line 7

def config(opts)
  @config = opts
end

#demaiden(lastname, opts = {}) ⇒ Object



52
53
54
55
56
57
58
59
60
# File 'lib/namor/namor.rb', line 52

def demaiden(lastname, opts = {})
  return [nil,nil] unless lastname && !lastname.empty?
  lastname = suppress(lastname, opts[:suppress]) if opts[:suppress]
  if lastname =~ /\-/
    [lastname.upcase.gsub(/ /, ''), lastname.split(/\-/).last.gsub(/ /, '')]
  else
    [lastname.upcase.gsub(/ /, ''), lastname.split(/ /).last]
  end
end

#extract(name, opts = {}) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/namor/namor.rb', line 84

def extract(name, opts = {})
  return [] if name.nil?

  detitled_name = scrub(name, opts)

  if detitled_name =~ /,/
    # "last, first[ middle]"
    lastname, firstname = detitled_name.split(/\s*,\s*/)
    lastname, de_maidened_last = demaiden(lastname)
    middlename = nil
    if firstname && firstname =~ / /
      pieces = firstname.split(/ +/)
      firstname = pieces.shift
      middlename = pieces.join if pieces.any?
    end
  else
    # "first [middle-initial ]last" or "first everything-else-is-the-lastname"
    pieces = detitled_name.split(' ')
    firstname = pieces.shift
    if pieces.count > 1 && pieces.first.length == 1
      # assume this is a middle initial
      middlename = pieces.shift
    else
      middlename = nil
    end

    lastname, de_maidened_last = demaiden(pieces.join(' '))
  end

  assemble(firstname, middlename, lastname, de_maidened_last)
end

#extract_from_pieces(hash, opts = {}) ⇒ Object



123
124
125
126
127
128
129
130
# File 'lib/namor/namor.rb', line 123

def extract_from_pieces(hash, opts = {})
  assemble(
    scrub(hash[:first], opts),
    scrub(hash[:middle], opts),
    scrub_and_squash(hash[:last], opts),
    scrub_and_squash((s = demaiden(hash[:last], opts)) && s.last, opts)
  )
end

#extract_from_pieces_with_cluster(hash, opts = {}) ⇒ Object



132
133
134
135
136
# File 'lib/namor/namor.rb', line 132

def extract_from_pieces_with_cluster(hash, opts = {})
  ary = extract_from_pieces(hash, opts)
  ary << ary[3].gsub(/\W/, '_')
  ary << ary[4].gsub(/\W/, '_')
end

#extract_with_cluster(name, opts = {}) ⇒ Object



116
117
118
119
120
121
# File 'lib/namor/namor.rb', line 116

def extract_with_cluster(name, opts = {})
  ary = extract(name, opts)
  return [] if ary.empty?
  ary << ary[3].gsub(/\W/, '_')
  ary << ary[4].gsub(/\W/, '_')
end

#final_cleaning(name) ⇒ Object



62
63
64
65
66
67
68
# File 'lib/namor/namor.rb', line 62

def final_cleaning(name)
  if name && !name.empty?
    name.gsub(/\-/, '')
  else
    nil
  end
end

#fullscrub(name, opts = {}) ⇒ Object



42
43
44
# File 'lib/namor/namor.rb', line 42

def fullscrub(name, opts = {})
  final_cleaning(scrub(name, opts))
end

#scrub(name, opts = {}) ⇒ Object

clean up a single name component

  • output all converted to uppercase

  • strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)

  • remove any words that are in the user-provided suppression list

  • remove words from list of common suffixes (Jr, Sr etc)

  • remove anything inside parenthesis

  • remove punctuation

  • squeeze whitespace & trim spaces from ends



36
37
38
39
40
# File 'lib/namor/namor.rb', line 36

def scrub(name, opts = {})
  @re_cache[opts[:suppress]] ||= suppression_re(opts[:suppress])

  name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\)]*\)/, '').gsub(/\[[^\]]*\]/, '').gsub(/\./, ' ').gsub(/[_'"\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
end

#scrub_and_squash(name, opts = {}) ⇒ Object

scrub as above, but as a final stage, convert the result to a single term (no spaces or hyphens between bits)



47
48
49
50
# File 'lib/namor/namor.rb', line 47

def scrub_and_squash(name, opts = {})
  s = scrub(name, opts)
  s && s.gsub(/[- ]/, '')
end

#suppress(name, supplist) ⇒ Object



23
24
25
26
# File 'lib/namor/namor.rb', line 23

def suppress(name, supplist)
  @re_cache[supplist] ||= suppression_re(supplist)
  name && name.upcase.gsub(@re_cache[supplist], '')
end

#suppression_re(supp_list) ⇒ Object



12
13
14
15
16
17
18
19
20
21
# File 'lib/namor/namor.rb', line 12

def suppression_re(supp_list)
  suppression_list = (@config[:suppress] || []) + (supp_list || [])

  re = '\b(' + suppression_list.compact.map{|s| s.chomp('.')}.map(&:upcase).join('|') + ')\b'
  Regexp.new(re)
  # bits = suppression_list.compact.map do |s|
  #   '\b' + s.upcase.chomp('.') + '\b'
  # end
  # Regexp.new(bits.join('|'))
end