Class: Numerizer

Inherits:
Object
  • Object
show all
Defined in:
lib/numerizer.rb

Constant Summary collapse

DIRECT_NUMS =
[
  ['eleven', '11'],
  ['twelve', '12'],
  ['thirteen', '13'],
  ['fourteen', '14'],
  ['fifteen', '15'],
  ['sixteen', '16'],
  ['seventeen', '17'],
  ['eighteen', '18'],
  ['nineteen', '19'],
  ['ninteen', '19'], # Common mis-spelling
  ['zero', '0'],
  ['one', '1'],
  ['two', '2'],
  ['three', '3'],
  ['four(\W|$)', '4\1'],  # The weird regex is so that it matches four but not fourty
  ['five', '5'],
  ['six(\W|$)', '6\1'],
  ['seven(\W|$)', '7\1'],
  ['eight(\W|$)', '8\1'],
  ['nine(\W|$)', '9\1'],
  ['ten', '10'],
  ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
]
TEN_PREFIXES =
[ ['twenty', 20],
  ['thirty', 30],
  ['forty', 40],
  ['fourty', 40], # Common misspelling
  ['fifty', 50],
  ['sixty', 60],
  ['seventy', 70],
  ['eighty', 80],
  ['ninety', 90]
]
BIG_PREFIXES =
[ ['hundred', 100],
  ['thousand', 1000],
  ['million', 1_000_000],
  ['billion', 1_000_000_000],
  ['trillion', 1_000_000_000_000],
]

Class Method Summary collapse

Class Method Details

.numerize(string) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/numerizer.rb', line 48

def self.numerize(string)
  string = string.dup

  # preprocess
  string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
  string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
  
  # easy/direct replacements
  
  DIRECT_NUMS.each do |dn|
    string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
  end
  
  # ten, twenty, etc.
  
  TEN_PREFIXES.each do |tp|
    string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
  end
  
  TEN_PREFIXES.each do |tp|
    string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
  end
  
  # hundreds, thousands, millions, etc.
  
  BIG_PREFIXES.each do |bp|
    string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
    andition(string)
  end
  
  # fractional addition
  # I'm not combining this with the previous block as using float addition complicates the strings
  # (with extraneous .0's and such )
  string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
  
  string.gsub(/<num>/, '')
end