Class: Geocoder::US::Address

Inherits:

Object

Object
Geocoder::US::Address

show all

Defined in:: lib/geocoder/us/address.rb

Overview

The Address class takes a US street address or place name and constructs a list of possible structured parses of the address string.

Instance Attribute Summary collapse

#city ⇒ Object

Returns the value of attribute city.
#number ⇒ Object

Returns the value of attribute number.
#plus4 ⇒ Object

Returns the value of attribute plus4.
#prenum ⇒ Object

Returns the value of attribute prenum.
#state ⇒ Object

Returns the value of attribute state.
#street ⇒ Object

Returns the value of attribute street.
#sufnum ⇒ Object

Returns the value of attribute sufnum.
#text ⇒ Object

Returns the value of attribute text.
#zip ⇒ Object

Returns the value of attribute zip.

Instance Method Summary collapse

#assign_text_to_address(text) ⇒ Object
#city_parts ⇒ Object
#clean(value) ⇒ Object

Removes any characters that aren’t strictly part of an address string.
#expand_numbers(string) ⇒ Object

Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.
#expand_streets(street) ⇒ Object
#initialize(text) ⇒ Address constructor

Takes an address or place name string as its sole argument.
#intersection? ⇒ Boolean
#parse ⇒ Object
#parse_number(regex_match, text) ⇒ Object
#parse_state(regex_match, text) ⇒ Object
#parse_zip(regex_match, text) ⇒ Object
#po_box? ⇒ Boolean
#remove_noise_words(strings) ⇒ Object
#street_parts ⇒ Object

Constructor Details

#initialize(text) ⇒ `Address`

Takes an address or place name string as its sole argument.

Raises:

(ArgumentError)

# File 'lib/geocoder/us/address.rb', line 28

def initialize (text)
  raise ArgumentError, "no text provided" unless text and !text.empty?
  if text.class == Hash
    @text = ""
    assign_text_to_address text
  else
    @text = clean text
    parse
  end
end

Instance Attribute Details

#city ⇒ `Object`

Returns the value of attribute city.



23
24
25

# File 'lib/geocoder/us/address.rb', line 23

def city
  @city
end

#number ⇒ `Object`

Returns the value of attribute number.



21
22
23

# File 'lib/geocoder/us/address.rb', line 21

def number
  @number
end

#plus4 ⇒ `Object`

Returns the value of attribute plus4.



25
26
27

# File 'lib/geocoder/us/address.rb', line 25

def plus4
  @plus4
end

#prenum ⇒ `Object`

Returns the value of attribute prenum.



21
22
23

# File 'lib/geocoder/us/address.rb', line 21

def prenum
  @prenum
end

#state ⇒ `Object`

Returns the value of attribute state.



24
25
26

# File 'lib/geocoder/us/address.rb', line 24

def state
  @state
end

#street ⇒ `Object`

Returns the value of attribute street.



22
23
24

# File 'lib/geocoder/us/address.rb', line 22

def street
  @street
end

#sufnum ⇒ `Object`

Returns the value of attribute sufnum.



21
22
23

# File 'lib/geocoder/us/address.rb', line 21

def sufnum
  @sufnum
end

#text ⇒ `Object`

Returns the value of attribute text.



20
21
22

# File 'lib/geocoder/us/address.rb', line 20

def text
  @text
end

#zip ⇒ `Object`

Returns the value of attribute zip.



25
26
27

# File 'lib/geocoder/us/address.rb', line 25

def zip
  @zip
end

Instance Method Details

#assign_text_to_address(text) ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 47

def assign_text_to_address(text)
  if !text[:address].nil?
    @text = clean text[:address]
    parse
  else
    @street = []
    @prenum = text[:prenum] 
    @sufnum = text[:sufnum] 
    if !text[:street].nil?
      @street = text[:street].scan(Match[:street])
    end
    @number = ""
    if !@street.nil?
      if text[:number].nil?
         @street.map! { |single_street|
           single_street.downcase!
           @number = single_street.scan(Match[:number])[0].to_s
           single_street.sub! @number, ""
           single_street.sub! /^\s*,?\s*/o, ""
          }
     else
        @number = text[:number].to_s 
      end
     @street = expand_streets(@street)
      street_parts
    end
    @city = []
    if !text[:city].nil?
      @city.push(text[:city])
      @text = text[:city].to_s
    else
      @city.push("")
    end
    if !text[:region].nil?
     # @state = []
     @state = text[:region]
      if @state.length > 2
       # full_state = @state.strip # special case: New York
        @state = State[@state]
      end
    elsif !text[:country].nil?
      @state = text[:country]
    elsif !text[:state].nil?
      @state = text[:state]
    end

    @zip = text[:postal_code] 
    @plus4 = text[:plus4] 
    if !@zip
       @zip = @plus4 = ""
    end
  end
end

#city_parts ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 260

def city_parts
  strings = []
  @city.map {|string|
    tokens = string.split(" ")
    strings |= (0...tokens.length).to_a.reverse.map {|i|
               (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
  }
  # Don't return strings that consist solely of abbreviations.
  # NOTE: Is this a micro-optimization that has edge cases that will break?
  # Answer: Yes, it breaks on "Prairie"
  good_strings = strings.reject {|s| Std_Abbr.key? s}
  strings = good_strings if !good_strings.empty?
  strings.uniq
end

#clean(value) ⇒ `Object`

Removes any characters that aren’t strictly part of an address string.

# File 'lib/geocoder/us/address.rb', line 40

def clean (value)
  value.strip \
       .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \
       .gsub(/\s+/o, " ")
end

#expand_numbers(string) ⇒ `Object`

Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.

# File 'lib/geocoder/us/address.rb', line 104

def expand_numbers (string)
  if /\b\d+(?:st|nd|rd|th)?\b/o.match string
    match = $&
    num = $&.to_i
  elsif Ordinals.regexp.match string
    num = Ordinals[$&]
    match = $&
  elsif Cardinals.regexp.match string
    num = Cardinals[$&]
    match = $&
  end
  strings = []
  if num and num < 100
    [num.to_s, Ordinals[num], Cardinals[num]].each {|replace|
      strings << string.sub(match, replace)
    }
  else
    strings << string
  end
  strings
end

#expand_streets(street) ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 207

def expand_streets(street)
  if !street.empty? && !street[0].nil?
    street.map! {|s|s.strip}
    add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
    street |= add
    add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}}
    street |= add
    street.map! {|item| expand_numbers(item)}
    street.flatten!
    street.map! {|s| s.downcase}
    street.uniq!
  else
    street = []
  end
  street
end

#intersection? ⇒ `Boolean`

Returns:

(Boolean)



286
287
288

# File 'lib/geocoder/us/address.rb', line 286

def intersection?
  Match[:at].match @text
end

#parse ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 152

def parse
  text = @text.clone.downcase

  @zip = text.scan(Match[:zip])[-1]
  if @zip
    text = parse_zip($&, text) 
  else
    @zip = @plus4 = ""
  end
  
  @state = text.scan(Match[:state])[-1]
  if @state
    text = parse_state($&, text)
  else
    @full_state = ""
    @state = ""
  end
  
  @number = text.scan(Match[:number])[0]
  # FIXME: 230 Fish And Game Rd, Hudson NY 12534
  if @number # and not intersection?
    text = parse_number($&, text)
  else
    @prenum = @number = @sufnum = ""
  end

  # FIXME: special case: Name_Abbr gets a bit aggressive
  # about replacing St with Saint. exceptional case:
  # Sault Ste. Marie

  # FIXME: PO Box should geocode to ZIP
  @street = text.scan(Match[:street])
  @street = expand_streets(@street)
  # SPECIAL CASE: 1600 Pennsylvania 20050
  @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase      
 
  @city = text.scan(Match[:city])
  if !@city.empty?
    @city = [@city[-1].strip]
    add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}} 
    @city |= add
    @city.map! {|s| s.downcase}
    @city.uniq!
  else
    @city = []
  end

  # SPECIAL CASE: no city, but a state with the same name. e.g. "New York"
  @city << @full_state if @state.downcase != @full_state.downcase

  # SPECIAL CASE: if given a single city string, and it's not the
  # same as the street string, remove it from the street parts
  self.city= @city if @city.length == 1 and @city != @street
end

#parse_number(regex_match, text) ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 143

def parse_number(regex_match, text)
  # FIXME: What if this string appears twice?
  idx = text.index(regex_match)  
  text[idx...idx+regex_match.length] = ""
  text.sub! /^\s*,?\s*/o, ""
  @prenum, @number, @sufnum = @number.map {|s| s and s.strip}
  text
end

#parse_state(regex_match, text) ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 134

def parse_state(regex_match, text)
  idx = text.rindex(regex_match)
  text[idx...idx+regex_match.length] = ""
  text.sub! /\s*,?\s*$/o, ""
  @full_state = @state[0].strip # special case: New York
  @state = State[@full_state]
  text
end

#parse_zip(regex_match, text) ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 126

def parse_zip(regex_match, text)
  idx = text.rindex(regex_match)
  text[idx...idx+regex_match.length] = ""
  text.sub! /\s*,?\s*$/o, ""
  @zip, @plus4 = @zip.map {|s|s.strip} 
  text
end

#po_box? ⇒ `Boolean`

Returns:

(Boolean)



282
283
284

# File 'lib/geocoder/us/address.rb', line 282

def po_box?
  Match[:po_box].match @text
end

#remove_noise_words(strings) ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 239

def remove_noise_words(strings)
  # Don't return strings that consist solely of abbreviations.
  # NOTE: Is this a micro-optimization that has edge cases that will break?
  # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St"
  prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE)
  suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE)
  predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE)
  sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE)
  good_strings = strings.map {|s|
    s = s.clone
    s.gsub!(predxn, "")
    s.gsub!(sufdxn, "")
    s.gsub!(prefix, "")
    s.gsub!(suffix, "")
    s
  }
  good_strings.reject! {|s| s.empty?}
  strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)}
  strings
end

#street_parts ⇒ `Object`

# File 'lib/geocoder/us/address.rb', line 224

def street_parts
  strings = []
  # Get all the substrings delimited by whitespace
  @street.each {|string|
    tokens = string.split(" ")
    strings |= (0...tokens.length).map {|i|
               (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
  }
  strings = remove_noise_words(strings)

  # Try a simpler case of adding the @number in case everything is an abbr.
  strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s}
  strings.uniq
end

Class: Geocoder::US::Address

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ Address

Instance Attribute Details

#city ⇒ Object

#number ⇒ Object

#plus4 ⇒ Object

#prenum ⇒ Object

#state ⇒ Object

#street ⇒ Object

#sufnum ⇒ Object

#text ⇒ Object

#zip ⇒ Object

Instance Method Details

#assign_text_to_address(text) ⇒ Object

#city_parts ⇒ Object

#clean(value) ⇒ Object

#expand_numbers(string) ⇒ Object

#expand_streets(street) ⇒ Object

#intersection? ⇒ Boolean

#parse ⇒ Object

#parse_number(regex_match, text) ⇒ Object

#parse_state(regex_match, text) ⇒ Object

#parse_zip(regex_match, text) ⇒ Object

#po_box? ⇒ Boolean

#remove_noise_words(strings) ⇒ Object

#street_parts ⇒ Object