Module: ZhongwenTools::Regex
- Extended by:
- Regex
- Included in:
- Regex
- Defined in:
- lib/zhongwen_tools/regex.rb,
lib/zhongwen_tools/regex/ruby18.rb
Instance Method Summary collapse
-
#bopomofo ⇒ Object
Public: A Regex for bopomofo, a.k.a.
- #capital_letters ⇒ Object
- #fullwidth ⇒ Object
- #lowercase_letters ⇒ Object
- #punc ⇒ Object
- #py ⇒ Object
- #py_tones ⇒ Object
- #pyn ⇒ Object
- #zh ⇒ Object
- #zh_numbers ⇒ Object
- #zh_punc ⇒ Object
Instance Method Details
#bopomofo ⇒ Object
Public: A Regex for bopomofo, a.k.a. Zhuyin Fuhao 注音符号.
Examples
bopomofo #=> <Regex>
Returns a Regex.
55 56 57 |
# File 'lib/zhongwen_tools/regex.rb', line 55 def bopomofo /[ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ]/ end |
#capital_letters ⇒ Object
20 21 22 |
# File 'lib/zhongwen_tools/regex.rb', line 20 def capital_letters /(#{Regexp.union(ZhongwenTools::UNICODE_CAPS.keys)})/ end |
#fullwidth ⇒ Object
16 17 18 |
# File 'lib/zhongwen_tools/regex.rb', line 16 def fullwidth /[0-9A-Za-z%.:#$&+-/\=;<>]/ end |
#lowercase_letters ⇒ Object
24 25 26 |
# File 'lib/zhongwen_tools/regex.rb', line 24 def lowercase_letters /(#{Regexp.union(ZhongwenTools::UNICODE_CAPS.values)})/ end |
#punc ⇒ Object
32 33 34 |
# File 'lib/zhongwen_tools/regex.rb', line 32 def punc /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]/ end |
#py ⇒ Object
10 11 12 13 14 |
# File 'lib/zhongwen_tools/regex.rb', line 10 def py # FIXME: need to detect Ālābó # ([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,} /(#{pyn_regexes.map{|k,v| v.to_s[7..-2].gsub_with_hash(/[aeiouv]/,py_tones)}.join('|')}([\s\-])?)/ end |
#py_tones ⇒ Object
80 81 82 83 84 85 86 87 88 89 |
# File 'lib/zhongwen_tools/regex.rb', line 80 def py_tones py_tones = { 'a' => '[āáǎàa]', 'e' => '[ēéěèe]', 'i' => '[īíǐìi]', 'o' => '[ōóǒòo]', 'u' => '[ūúǔùu]', 'v' => '[ǖǘǚǜü]' } end |
#pyn ⇒ Object
6 7 8 |
# File 'lib/zhongwen_tools/regex.rb', line 6 def pyn /(#{pyn_regexes.values.join('|')}|r)([1-5])?([\s\-]+)?/ end |
#zh ⇒ Object
28 29 30 |
# File 'lib/zhongwen_tools/regex.rb', line 28 def zh /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/ end |
#zh_numbers ⇒ Object
41 42 43 44 45 |
# File 'lib/zhongwen_tools/regex.rb', line 41 def zh_numbers # TODO: include numbers like yotta, etc. # 垓 秭 穰 溝 澗 正 載 --> beyond 100,000,000! /[〇零一壹幺二贰貳两兩三弎叁參四肆䦉五伍六陆陸七柒八捌九玖十拾廿百佰千仟万萬亿億]/ end |
#zh_punc ⇒ Object
36 37 38 39 |
# File 'lib/zhongwen_tools/regex.rb', line 36 def zh_punc # TODO: includes non-zh punctuation codes. Should only include punctuation in CJK ranges. /[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/ end |