Class: Namor::Namor
- Inherits:
-
Object
- Object
- Namor::Namor
- Defined in:
- lib/namor/namor.rb
Instance Method Summary collapse
- #assemble(firstname, middlename, lastname, de_maidened_last) ⇒ Object
- #components(*args) ⇒ Object
- #config(opts) ⇒ Object
- #demaiden(lastname, opts = {}) ⇒ Object
- #extract(name, opts = {}) ⇒ Object
- #extract_from_pieces(hash, opts = {}) ⇒ Object
- #extract_from_pieces_with_cluster(hash, opts = {}) ⇒ Object
- #extract_with_cluster(name, opts = {}) ⇒ Object
- #final_cleaning(name) ⇒ Object
- #fullscrub(name, opts = {}) ⇒ Object
-
#initialize(opts = {}) ⇒ Namor
constructor
A new instance of Namor.
-
#scrub(name, opts = {}) ⇒ Object
clean up a single name component * output all converted to uppercase * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes) * remove any words that are in the user-provided suppression list * remove words from list of common suffixes (Jr, Sr etc) * remove anything inside parenthesis * remove punctuation * squeeze whitespace & trim spaces from ends.
-
#scrub_and_squash(name, opts = {}) ⇒ Object
scrub as above, but as a final stage, convert the result to a single term (no spaces or hyphens between bits).
- #suppress(name, supplist) ⇒ Object
- #suppression_re(supp_list) ⇒ Object
Constructor Details
#initialize(opts = {}) ⇒ Namor
Returns a new instance of Namor.
2 3 4 5 |
# File 'lib/namor/namor.rb', line 2 def initialize(opts = {}) config(opts) @re_cache = {} end |
Instance Method Details
#assemble(firstname, middlename, lastname, de_maidened_last) ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/namor/namor.rb', line 70 def assemble(firstname, middlename, lastname, de_maidened_last) firstname = final_cleaning(firstname) middlename = final_cleaning(middlename) lastname = final_cleaning(lastname) de_maidened_last = final_cleaning(de_maidened_last) fm = [firstname, middlename].compact.join(' ') fm = nil if fm.empty? fullname = [lastname, fm].compact.join(',') nee_fullname = [de_maidened_last, fm].compact.join(',') [firstname, middlename, lastname, fullname, nee_fullname] end |
#components(*args) ⇒ Object
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/namor/namor.rb', line 138 def components(*args) suppression_list = @config[:suppress] ? @config[:suppress].map(&:upcase) : [] names = args bits = [] names.compact.each do |name| name = name.dup name.gsub!(/\([^\(]*\)/, '') name.gsub!(/\[[^\[]*\]/, '') name.gsub!(/[\(\)\[\]\']/, '') name.gsub!(/[,._-]/, ' ') bits += name.split(/\s+/).map(&:upcase) end suppress_re = %w{MD JR SR I+ IV}.join('|') bits.delete_if {|bit| suppression_list.include?(bit) || bit =~ /^(#{suppress_re})$/} bits.delete_if(&:empty?) bits.uniq.sort end |
#config(opts) ⇒ Object
7 8 9 |
# File 'lib/namor/namor.rb', line 7 def config(opts) @config = opts end |
#demaiden(lastname, opts = {}) ⇒ Object
52 53 54 55 56 57 58 59 60 |
# File 'lib/namor/namor.rb', line 52 def demaiden(lastname, opts = {}) return [nil,nil] unless lastname && !lastname.empty? lastname = suppress(lastname, opts[:suppress]) if opts[:suppress] if lastname =~ /\-/ [lastname.upcase.gsub(/ /, ''), lastname.split(/\-/).last.gsub(/ /, '')] else [lastname.upcase.gsub(/ /, ''), lastname.split(/ /).last] end end |
#extract(name, opts = {}) ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/namor/namor.rb', line 84 def extract(name, opts = {}) return [] if name.nil? detitled_name = scrub(name, opts) if detitled_name =~ /,/ # "last, first[ middle]" lastname, firstname = detitled_name.split(/\s*,\s*/) lastname, de_maidened_last = demaiden(lastname) middlename = nil if firstname && firstname =~ / / pieces = firstname.split(/ +/) firstname = pieces.shift middlename = pieces.join if pieces.any? end else # "first [middle-initial ]last" or "first everything-else-is-the-lastname" pieces = detitled_name.split(' ') firstname = pieces.shift if pieces.count > 1 && pieces.first.length == 1 # assume this is a middle initial middlename = pieces.shift else middlename = nil end lastname, de_maidened_last = demaiden(pieces.join(' ')) end assemble(firstname, middlename, lastname, de_maidened_last) end |
#extract_from_pieces(hash, opts = {}) ⇒ Object
123 124 125 126 127 128 129 130 |
# File 'lib/namor/namor.rb', line 123 def extract_from_pieces(hash, opts = {}) assemble( scrub(hash[:first], opts), scrub(hash[:middle], opts), scrub_and_squash(hash[:last], opts), scrub_and_squash((s = demaiden(hash[:last], opts)) && s.last, opts) ) end |
#extract_from_pieces_with_cluster(hash, opts = {}) ⇒ Object
132 133 134 135 136 |
# File 'lib/namor/namor.rb', line 132 def extract_from_pieces_with_cluster(hash, opts = {}) ary = extract_from_pieces(hash, opts) ary << ary[3].gsub(/\W/, '_') ary << ary[4].gsub(/\W/, '_') end |
#extract_with_cluster(name, opts = {}) ⇒ Object
116 117 118 119 120 121 |
# File 'lib/namor/namor.rb', line 116 def extract_with_cluster(name, opts = {}) ary = extract(name, opts) return [] if ary.empty? ary << ary[3].gsub(/\W/, '_') ary << ary[4].gsub(/\W/, '_') end |
#final_cleaning(name) ⇒ Object
62 63 64 65 66 67 68 |
# File 'lib/namor/namor.rb', line 62 def final_cleaning(name) if name && !name.empty? name.gsub(/\-/, '') else nil end end |
#fullscrub(name, opts = {}) ⇒ Object
42 43 44 |
# File 'lib/namor/namor.rb', line 42 def fullscrub(name, opts = {}) final_cleaning(scrub(name, opts)) end |
#scrub(name, opts = {}) ⇒ Object
clean up a single name component
-
output all converted to uppercase
-
strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
-
remove any words that are in the user-provided suppression list
-
remove words from list of common suffixes (Jr, Sr etc)
-
remove anything inside parenthesis
-
remove punctuation
-
squeeze whitespace & trim spaces from ends
36 37 38 39 40 |
# File 'lib/namor/namor.rb', line 36 def scrub(name, opts = {}) @re_cache[opts[:suppress]] ||= suppression_re(opts[:suppress]) name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\)]*\)/, '').gsub(/\[[^\]]*\]/, '').gsub(/\./, ' ').gsub(/[_'"\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip end |
#scrub_and_squash(name, opts = {}) ⇒ Object
scrub as above, but as a final stage, convert the result to a single term (no spaces or hyphens between bits)
47 48 49 50 |
# File 'lib/namor/namor.rb', line 47 def scrub_and_squash(name, opts = {}) s = scrub(name, opts) s && s.gsub(/[- ]/, '') end |
#suppress(name, supplist) ⇒ Object
23 24 25 26 |
# File 'lib/namor/namor.rb', line 23 def suppress(name, supplist) @re_cache[supplist] ||= suppression_re(supplist) name && name.upcase.gsub(@re_cache[supplist], '') end |
#suppression_re(supp_list) ⇒ Object
12 13 14 15 16 17 18 19 20 21 |
# File 'lib/namor/namor.rb', line 12 def suppression_re(supp_list) suppression_list = (@config[:suppress] || []) + (supp_list || []) re = '\b(' + suppression_list.compact.map{|s| s.chomp('.')}.map(&:upcase).join('|') + ')\b' Regexp.new(re) # bits = suppression_list.compact.map do |s| # '\b' + s.upcase.chomp('.') + '\b' # end # Regexp.new(bits.join('|')) end |