Class: NumberNormalizer

Inherits:
Object
  • Object
show all
Defined in:
lib/number_normalizer.rb,
lib/number_normalizer/version.rb

Constant Summary collapse

NUM_WORD =
{
  'zero'       =>  0 ,
  'one'        =>  1 ,
  'two'        =>  2 ,
  'three'      =>  3 ,
  'four'       =>  4 ,
  'five'       =>  5 ,
  'six'        =>  6 ,
  'seven'      =>  7 ,
  'eight'      =>  8 ,
  'nine'       =>  9 ,
  'ten'        =>  10,
  'eleven'     =>  11,
  'twelve'     =>  12,
  'thirteen'   =>  13,
  'fourteen'   =>  14,
  'fifteen'    =>  15,
  'sixteen'    =>  16,
  'seventeen'  =>  17,
  'eighteen'   =>  18,
  'nineteen'   =>  19,
  'twenty'     =>  20,
  'thirty'     =>  30,
  'forty'      =>  40,
  'fifty'      =>  50,
  'sixty'      =>  60,
  'seventy'    =>  70,
  'eighty'     =>  80,
  'ninety'     =>  90,
  'a'          =>  1,
  'an'         =>  1,
}
MULT_WORD =
{
  'half'       => 0.5,
  'dozen'      => 12,
  'hundred'    => 100,
  'thousand'   => 1000,
  'million'    => 1000000,
  'billion'    => 1000000000,
  'trillion'   => 1000000000000,
}
ADJ_WORD =
{
  '-'          => 0,
  'and'        => 0,
}
VERSION =
"0.1.1"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ NumberNormalizer

Returns a new instance of NumberNormalizer.



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/number_normalizer.rb', line 57

def initialize text
  @text =  text
  @token = ' '
  @text_array = []
  # parse all numbers and store it in an array
  # [ {original_format => "1 thousand",
  #    digit_format => "1000",
  #    word_format => "one thousand",
  #    occurance => 1}, ... ]
  @p_digits_only = '\d+'
  @p_floating_digits = '\d+\.\d+'
  @p_floating_digits_2 = '\.\d+'
  @p_space_sperated_digits = '\d{,3}(\s\d{3}){1,}(\.\d{1,3}){,1}(\s\d{3})*(\s\d{,3})?'
  @p_comma_sperated_digits = '\d{,3}(,\d{3}){1,}(\.\d{1,3}){,1}(,\d{3})*(,\d{,3})?'
  @p_begin = ''
  @p_end = '\W?(\s|$)'
  @matches = {}

  find_all_digits

  find_all_words
end

Instance Attribute Details

#matchesObject (readonly)

Returns the value of attribute matches.



7
8
9
# File 'lib/number_normalizer.rb', line 7

def matches
  @matches
end

#textObject (readonly)

Returns the value of attribute text.



6
7
8
# File 'lib/number_normalizer.rb', line 6

def text
  @text
end

Instance Method Details

#digit_numbersObject



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/number_normalizer.rb', line 80

def digit_numbers

  convert_digit_string_to_numbers
  digits = Set.new

  #puts @matches

  @matches.each_value do |v|
    if v.has_key?:digit_form
      digits.add(v[:digit_form])
    end
  end

  return digits.to_a
end