Module: TextUtils::AddressHelper

Included in:
TextUtils
Defined in:
lib/textutils/helper/address_helper.rb

Instance Method Summary collapse

Instance Method Details

#find_city_in_addr(address, country_key) ⇒ Object



150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/textutils/helper/address_helper.rb', line 150

def find_city_in_addr( address, country_key )

  # fix: use blank?
  return nil if address.nil? || address.empty?    # do NOT process nil or empty address lines; sorry

  ## try geneneric rule first (e.g. w/o postal code/zip code or state), see above
  city = find_city_in_addr_without_postal_code( address )
  return city unless city.nil?
  
  city = find_city_in_addr_with_postal_code( address, country_key )
  return city unless city.nil?

  nil # sorry; no city found (using known patterns)
end

#find_city_in_addr_with_postal_code(address, country_key) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/textutils/helper/address_helper.rb', line 91

def find_city_in_addr_with_postal_code( address, country_key )

  # fix: use blank?
  return nil if address.nil? || address.empty?    # do NOT process nil or empty address lines; sorry

  lines = address.split( '//' )

  if country_key == 'at' || country_key == 'be'
    # support for now
    #  - 2018 Antwerpen or 2870 Breendonk-Puurs (be)
    lines.each do |line|
      linec = line.strip
      regex_nnnn = /^[0-9]{4}\s+/ 
      if linec =~ regex_nnnn   # must start w/ four digit postal code ? assume its the city line
        return linec.sub( regex_nnnn, '' )  # cut off leading postal code; assume rest is city
      end
    end
  elsif country_key == 'de'
    lines.each do |line|
      linec = line.strip
      regex_nnnnn = /^[0-9]{5}\s+/
      if linec =~ regex_nnnnn   # must start w/ five digit postal code ? assume its the city line
        return linec.sub( regex_nnnnn, '' )  # cut off leading postal code; assume rest is city
      end
    end
  elsif country_key == 'cz' || country_key == 'sk'
    # support for now
    #  - 284 15  Kutná Hora or  288 25  Nymburk (cz)
    #  - 036 42  Martin     or  974 05  Banská Bystrica (sk)
    lines.each do |line|
      linec = line.strip
      regex_nnn_nn = /^[0-9]{3}\s[0-9]{2}\s+/
      if linec =~ regex_nnn_nn   # must start w/ five digit postal code ? assume its the city line
        return linec.sub( regex_nnn_nn, '' )  # cut off leading postal code; assume rest is city
      end
    end
  elsif country_key == 'us'
    # support for now
    #  - Brooklyn | NY 11249  or Brooklyn, NY 11249
    #  - Brooklyn | NY   or Brooklyn, NY

    lines.each do |line|
      linec = line.strip
      regexes_us = [/\s*[|,]\s+[A-Z]{2}\s+[0-9]{5}\s*$/,
                    /\s*[|,]\s+[A-Z]{2}\s*$/]
      
      regexes_us.each do |regex|
        if linec =~ regex
          return linec.sub( regex, '' )  # cut off leading postal code; assume rest is city
        end
      end
    end
  else
    # unsupported country/address schema for now; sorry
  end
  return nil   # sorry nothing found
end

#find_city_in_addr_without_postal_code(address) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/textutils/helper/address_helper.rb', line 42

def find_city_in_addr_without_postal_code( address )

  ## general rule; not country-specific; no postal code/zip code or state
  #  - must be like two lines (one line empty) e.g.
  #  // London   or
  # London //
  #  will assume entry is city
  #  note: city may NOT include numbers, or pipe (|) or comma (,) chars

  # fix: use blank?
  return nil if address.nil? || address.empty?    # do NOT process nil or empty address lines; sorry

  old_lines = address.split( '//' )

  ###
  # note:   London //   will get split into arry with size 1 e.g. ['London ']
  #   support it, that is, add missing empty line

  # 1) strip lines
  # 2) remove blank lines
  lines = []
  
  old_lines.each do |line|
    linec = line.strip
    next if linec.empty?
    lines << linec
  end

  if lines.size == 1
    linec = lines[0]
      #  note: city may NOT include
      #   numbers  (e.g. assumes zip/postal code etc.) or
      #   pipe (|) or
      #   comma (,)
    if linec =~ /[0-9|,]/
      return nil
    end
      #   more than two uppercase letters e.g. TX NY etc.
      #  check if city exists wit tow uppercase letters??
    if linec =~ /[A-Z]{2,}/
      return nil
    end
    return linec   # bingo!!! assume candidate line is a city name
  end

  nil  # no generic city match found
end

#normalize_addr(old_address, country_key = nil) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/textutils/helper/address_helper.rb', line 7

def normalize_addr( old_address, country_key=nil )

  # for now only checks german (de) 5-digit zip code and
  #                    austrian (at) 4-digit zip code
  #
  #  e.g.  Alte Plauener Straße 24 // 95028 Hof  becomes
  #        95028 Hof // Alte Plauener Straße 24 

  if country_key.nil?
    puts "TextUtils.normalize_addr drepreciated call - country_key now required; please add !!"
    return old_address
  end
  
  new_address = old_address   # default - do nothing - just path through
  
  lines = old_address.split( '//' )
  
  if lines.size == 2   # two lines / check for switching lines
    
    line1 = lines[0].strip
    line2 = lines[1].strip

    regex_nnnn  = /^[0-9]{4}\s+/   # four digits postal code
    regex_nnnnn = /^[0-9]{5}\s+/   # five digits postal code

    if (country_key == 'at' && line2 =~ regex_nnnn ) ||
       (country_key == 'de' && line2 =~ regex_nnnnn )
      new_address = "#{line2} // #{line1}"
    end
  end

  new_address
end