Module: ZhongwenTools::Regex

Defined in:
lib/zhongwen_tools/regex.rb

Class Method Summary collapse

Class Method Details

.bopomofoObject

Public: A Regex for bopomofo, a.k.a. Zhuyin Fuhao 注音符号.

Examples

bopomofo #=> <Regex>

Returns a Regex.



75
76
77
# File 'lib/zhongwen_tools/regex.rb', line 75

def self.bopomofo
  /\p{Bopomofo}/
end

.capital_lettersObject



26
27
28
# File 'lib/zhongwen_tools/regex.rb', line 26

def self.capital_letters
  /(#{Regexp.union(ZhongwenTools::Caps::CAPS.keys)})/
end

.fullwidthObject



22
23
24
# File 'lib/zhongwen_tools/regex.rb', line 22

def self.fullwidth
  /[0-9A-Za-z%.:#$&+-/\=;<>]/
end

.lowercase_lettersObject



30
31
32
# File 'lib/zhongwen_tools/regex.rb', line 30

def self.lowercase_letters
  /(#{Regexp.union(ZhongwenTools::Caps::CAPS.values)})/
end

.pinyin_numObject



13
14
15
16
# File 'lib/zhongwen_tools/regex.rb', line 13

def self.pinyin_num
  # FIXME: n?g? might need to be replaced with (ng|n)?
  /(([BPMFDTNLGKHZCSRJQXWYbpmfdtnlgkhzcsrjqxwy]?[h]?)(A[io]?|a[io]?|i[aeu]?o?|Ei?|ei?|Ou?|ou?|u[aoe]?i?|ve?)?(n?g?)(r?)([1-5])(\-+)?)/
end

.pinyin_tonelessObject



18
19
20
# File 'lib/zhongwen_tools/regex.rb', line 18

def self.pinyin_toneless
  @pynt ||= /(#{pyn_regexes.values.join('|')}|r)([\s\-]+)?/
end

.puncObject



38
39
40
# File 'lib/zhongwen_tools/regex.rb', line 38

def self.punc
  /\p{Punct}/
end

.pyObject



8
9
10
11
# File 'lib/zhongwen_tools/regex.rb', line 8

def self.py
  # ([ĀÁǍÀA][io]?|[io]?|[][āáǎàaēéěèeūúǔùu]?o?|[ĒÉĚÈE]i?|[]i?|[ŌÓǑÒO]u?|[]u?|u[āáǎàaēoēéěèe]?i?|[]e?)(n?g?r?)){1,}
  @py ||= /(#{pyn_regexes.map { |_k, v| v.to_s[7..-2].gsub(/[aeiouv]/, py_tones) }.join('|')}([\s\-])?)/
end

.pynObject



4
5
6
# File 'lib/zhongwen_tools/regex.rb', line 4

def self.pyn
  @pyn ||= /(#{pyn_regexes.values.join('|')}|r)([1-5])([\s\-]+)?/
end

.zhObject



34
35
36
# File 'lib/zhongwen_tools/regex.rb', line 34

def self.zh
  /\p{Han}/
end

.zh_number_multipleObject



63
64
65
# File 'lib/zhongwen_tools/regex.rb', line 63

def self.zh_number_multiple
  /[拾十百佰千仟万萬亿億]/
end

.zh_numbersObject



47
48
49
50
51
52
# File 'lib/zhongwen_tools/regex.rb', line 47

def self.zh_numbers
  # TODO: include numbers like yotta, etc.
  # 垓	秭	穰	溝	澗	正	載 --> beyond 100,000,000!
  # Regional: Dong Guai
  /[〇零一壹幺二贰貳两兩三弎叁參仨四肆䦉五伍六陆陸七柒八捌九玖十拾廿卅百佰千仟万萬亿億]/
end

.zh_puncObject



42
43
44
45
# File 'lib/zhongwen_tools/regex.rb', line 42

def self.zh_punc
  # TODO: includes non-zh punctuation codes. Should only include punctuation in CJK ranges.
  /[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
end

.zhs_numbersObject



54
55
56
57
# File 'lib/zhongwen_tools/regex.rb', line 54

def self.zhs_numbers
  # TODO: check if 佰,仟 are the financial numbers in zhs
  /[〇零一壹幺二贰两三弎叁仨四肆䦉五伍六陆七柒八捌九玖十拾廿卅百佰千仟万亿]/
end

.zht_numbersObject



59
60
61
# File 'lib/zhongwen_tools/regex.rb', line 59

def self.zht_numbers
  /[〇零一壹幺二貳兩三弎參仨四肆䦉五伍六陸七柒八捌九玖十拾廿卅佰千仟萬億]/
end