Module: GyomuRuby::WordNormalizer

Extended by:
WordNormalizer
Included in:
WordNormalizer
Defined in:
lib/gyomu_ruby/word_normalizer.rb

Instance Method Summary collapse

Instance Method Details

#normalize_symbol_unicode(word) ⇒ Object



20
21
22
23
24
# File 'lib/gyomu_ruby/word_normalizer.rb', line 20

def normalize_symbol_unicode(word)
  normalize_unconversion_symbol_unicode(word).tap do |w|
    w.gsub!(/\u002d|\uff0d|\u2015|\u2500/){ "\u30fc" }
  end
end

#normalize_unconversion_symbol_unicode(word) ⇒ Object



13
14
15
16
17
18
# File 'lib/gyomu_ruby/word_normalizer.rb', line 13

def normalize_unconversion_symbol_unicode(word)
  word.tap do |w|
    w.gsub!(/\u2212|\u2012|\u2013|\u2014/){ "\u30fc" }
    w.gsub!(/\u301c/){ "\uff5e" }
  end
end

#to_array(words) ⇒ Object



44
45
46
47
48
49
50
# File 'lib/gyomu_ruby/word_normalizer.rb', line 44

def to_array(words)
  if words.is_a?(Hash)
    words.inject([]){|memo, (k,v)| v.blank? ? memo : memo << k.to_s }
  else
    Array(words)
  end
end

#to_date(word) ⇒ Object



34
35
36
37
38
39
40
41
42
# File 'lib/gyomu_ruby/word_normalizer.rb', line 34

def to_date(word)
  if word.acts_like?(:date) && word.acts_like_date?
    word
  elsif word.respond_to?(:to_date)
    word.to_date
  else
    Date.parse(word.to_s) rescue nil
  end
end

#to_doublewidth_hiragana(word) ⇒ Object Also known as: to_kana



8
9
10
# File 'lib/gyomu_ruby/word_normalizer.rb', line 8

def to_doublewidth_hiragana(word)
  normalize_symbol_unicode(Moji.kata_to_hira(Moji.han_to_zen(word || '', Moji::KANA)))
end

#to_hankaku(word) ⇒ Object

XXX 0x20が壊れる詳細を追いかける



53
54
55
56
# File 'lib/gyomu_ruby/word_normalizer.rb', line 53

def to_hankaku(word)
  return '' if word.blank?
  Moji.zen_to_han(Moji.han_to_zen(normalize_symbol_unicode(word), Moji::SYMBOL)).gsub(/\uff70/){ "\u002d" }
end

#to_numeric(word) ⇒ Object



58
59
60
61
# File 'lib/gyomu_ruby/word_normalizer.rb', line 58

def to_numeric(word)
  return word.to_s if word.is_a?(Numeric)
  to_hankaku(word || '').scan(/\d/).join
end

#to_time(word) ⇒ Object



26
27
28
29
30
31
32
# File 'lib/gyomu_ruby/word_normalizer.rb', line 26

def to_time(word)
  if word.acts_like?(:time) && word.acts_like_time?
    word
  else
    Time.parse(word.to_s) rescue nil
  end
end