Module: GyomuRuby::WordNormalizer
- Extended by:
- WordNormalizer
- Included in:
- WordNormalizer
- Defined in:
- lib/gyomu_ruby/word_normalizer.rb
Instance Method Summary collapse
- #normalize_symbol_unicode(word) ⇒ Object
- #normalize_unconversion_symbol_unicode(word) ⇒ Object
- #to_array(words) ⇒ Object
- #to_date(word) ⇒ Object
- #to_doublewidth_hiragana(word) ⇒ Object (also: #to_kana)
-
#to_hankaku(word) ⇒ Object
XXX 0x20が壊れる詳細を追いかける.
- #to_numeric(word) ⇒ Object
- #to_time(word) ⇒ Object
Instance Method Details
#normalize_symbol_unicode(word) ⇒ Object
20 21 22 23 24 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 20 def normalize_symbol_unicode(word) normalize_unconversion_symbol_unicode(word).tap do |w| w.gsub!(/\u002d|\uff0d|\u2015|\u2500/){ "\u30fc" } end end |
#normalize_unconversion_symbol_unicode(word) ⇒ Object
13 14 15 16 17 18 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 13 def normalize_unconversion_symbol_unicode(word) word.tap do |w| w.gsub!(/\u2212|\u2012|\u2013|\u2014/){ "\u30fc" } w.gsub!(/\u301c/){ "\uff5e" } end end |
#to_array(words) ⇒ Object
44 45 46 47 48 49 50 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 44 def to_array(words) if words.is_a?(Hash) words.inject([]){|memo, (k,v)| v.blank? ? memo : memo << k.to_s } else Array(words) end end |
#to_date(word) ⇒ Object
34 35 36 37 38 39 40 41 42 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 34 def to_date(word) if word.acts_like?(:date) && word.acts_like_date? word elsif word.respond_to?(:to_date) word.to_date else Date.parse(word.to_s) rescue nil end end |
#to_doublewidth_hiragana(word) ⇒ Object Also known as: to_kana
8 9 10 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 8 def to_doublewidth_hiragana(word) normalize_symbol_unicode(Moji.kata_to_hira(Moji.han_to_zen(word || '', Moji::KANA))) end |
#to_hankaku(word) ⇒ Object
XXX 0x20が壊れる詳細を追いかける
53 54 55 56 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 53 def to_hankaku(word) return '' if word.blank? Moji.zen_to_han(Moji.han_to_zen(normalize_symbol_unicode(word), Moji::SYMBOL)).gsub(/\uff70/){ "\u002d" } end |
#to_numeric(word) ⇒ Object
58 59 60 61 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 58 def to_numeric(word) return word.to_s if word.is_a?(Numeric) to_hankaku(word || '').scan(/\d/).join end |
#to_time(word) ⇒ Object
26 27 28 29 30 31 32 |
# File 'lib/gyomu_ruby/word_normalizer.rb', line 26 def to_time(word) if word.acts_like?(:time) && word.acts_like_time? word else Time.parse(word.to_s) rescue nil end end |