Module: ZhongwenTools::String
- Included in:
- Basement
- Defined in:
- lib/zhongwen_tools/string.rb,
lib/zhongwen_tools/string/ruby18.rb
Defined Under Namespace
Classes: Basement
Constant Summary collapse
- UNICODE_REGEX =
{ :zh => /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/, :punc => /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]|[\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F]|[\u066A-\u066D]|[\u06D4]|[\u0700-\u070D]|[\u07F7-\u07F9]|[\u0830-\u083E]|[\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B]|[\u0F04-\u0F12]|[\u0F14]|[\u0F3A-\u0F3D]|[\u0F85]|[\u0FD0-\u0FD4]|[\u0FD9\u0FDA]|[\u104A-\u104F]|[\u10FB]|[\u1360-\u1368]|[\u1400\u166D\u166E\u169B\u169C]|[\u16EB-\u16ED]|[\u1735\u1736]|[\u17D4-\u17D6]|[\u17D8-\u17DA]|[\u1800-\u180A\u1944\u1945\u1A1E\u1A1F]|[\u1AA0-\u1AA6]|[\u1AA8-\u1AAD]|[\u1B5A-\u1B60]|[\u1BFC-\u1BFF]|[\u1C3B-\u1C3F]|[\u1C7E\u1C7F]|[\u1CC0-\u1CC7]|[\u1CD3]|[\u2010-\u2027]|[\u2030-\u2043]|[\u2045-\u2051]|[\u2053-\u205E]|[\u207D\u207E\u208D\u208E\u2329\u232A]|[\u2768-\u2775\u27C5\u27C6]|[\u27E6-\u27EF]|[\u2983-\u2998]|[\u29D8-\u29DB\u29FC\u29FD]|[\u2CF9-\u2CFC]|[\u2CFE\u2CFF\u2D70]|[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/ }
Class Method Summary collapse
- .ascii?(*args) ⇒ Boolean
- .chars(*args) ⇒ Object
- .from_codepoint(*args) ⇒ Object
- .fullwidth?(*args) ⇒ Boolean
- .halfwidth?(*args) ⇒ Boolean
- .has_zh?(*args) ⇒ Boolean
- .has_zh_punctuation?(*args) ⇒ Boolean
- .multibyte?(*args) ⇒ Boolean
- .reverse(*args) ⇒ Object
- .size(*args) ⇒ Object
- .to_codepoint(*args) ⇒ Object
- .to_halfwidth(*args) ⇒ Object
- .to_utf8(*args) ⇒ Object
- .uri_encode(*args) ⇒ Object
- .uri_escape(*args) ⇒ Object
- .zh?(*args) ⇒ Boolean
Instance Method Summary collapse
- #ascii?(str = nil) ⇒ Boolean
- #chars(str = nil) ⇒ Object
- #convert_regex(regex) ⇒ Object
- #from_codepoint(str = nil) ⇒ Object
- #fullwidth?(str = nil) ⇒ Boolean
- #halfwidth?(str = nil) ⇒ Boolean
- #has_zh?(str = nil) ⇒ Boolean
- #has_zh_punctuation?(str = nil) ⇒ Boolean
- #multibyte?(str = nil) ⇒ Boolean
- #reverse(str = nil) ⇒ Object
- #size(str = nil) ⇒ Object
- #to_codepoint(str = nil) ⇒ Object
- #to_halfwidth(str = nil) ⇒ Object
- #to_utf8(encoding = nil, encodings = nil) ⇒ Object
- #uri_encode(str = nil) ⇒ Object
- #uri_escape(str = nil) ⇒ Object
- #zh?(str = nil) ⇒ Boolean
Class Method Details
.ascii?(*args) ⇒ Boolean
127 128 129 |
# File 'lib/zhongwen_tools/string.rb', line 127 def self.ascii?(*args) Basement.new.ascii?(*args) end |
.chars(*args) ⇒ Object
109 110 111 |
# File 'lib/zhongwen_tools/string.rb', line 109 def self.chars(*args) Basement.new.chars(*args) end |
.from_codepoint(*args) ⇒ Object
154 155 156 |
# File 'lib/zhongwen_tools/string.rb', line 154 def self.from_codepoint(*args) Basement.new.from_codepoint(*args) end |
.fullwidth?(*args) ⇒ Boolean
136 137 138 |
# File 'lib/zhongwen_tools/string.rb', line 136 def self.fullwidth?(*args) Basement.new.fullwidth?(*args) end |
.halfwidth?(*args) ⇒ Boolean
133 134 135 |
# File 'lib/zhongwen_tools/string.rb', line 133 def self.halfwidth?(*args) Basement.new.halfwidth?(*args) end |
.has_zh?(*args) ⇒ Boolean
142 143 144 |
# File 'lib/zhongwen_tools/string.rb', line 142 def self.has_zh?(*args) Basement.new.has_zh?(*args) end |
.has_zh_punctuation?(*args) ⇒ Boolean
145 146 147 |
# File 'lib/zhongwen_tools/string.rb', line 145 def self.has_zh_punctuation?(*args) Basement.new.has_zh_punctuation?(*args) end |
.multibyte?(*args) ⇒ Boolean
130 131 132 |
# File 'lib/zhongwen_tools/string.rb', line 130 def self.multibyte?(*args) Basement.new.multibyte?(*args) end |
.reverse(*args) ⇒ Object
115 116 117 |
# File 'lib/zhongwen_tools/string.rb', line 115 def self.reverse(*args) Basement.new.reverse(*args) end |
.size(*args) ⇒ Object
112 113 114 |
# File 'lib/zhongwen_tools/string.rb', line 112 def self.size(*args) Basement.new.size(*args) end |
.to_codepoint(*args) ⇒ Object
151 152 153 |
# File 'lib/zhongwen_tools/string.rb', line 151 def self.to_codepoint(*args) Basement.new.to_codepoint(*args) end |
.to_halfwidth(*args) ⇒ Object
139 140 141 |
# File 'lib/zhongwen_tools/string.rb', line 139 def self.to_halfwidth(*args) Basement.new.to_halfwidth(*args) end |
.to_utf8(*args) ⇒ Object
118 119 120 |
# File 'lib/zhongwen_tools/string.rb', line 118 def self.to_utf8(*args) Basement.new.to_utf8(*args) end |
.uri_encode(*args) ⇒ Object
121 122 123 |
# File 'lib/zhongwen_tools/string.rb', line 121 def self.uri_encode(*args) Basement.new.uri_encode(*args) end |
.uri_escape(*args) ⇒ Object
124 125 126 |
# File 'lib/zhongwen_tools/string.rb', line 124 def self.uri_escape(*args) Basement.new.uri_escape(*args) end |
.zh?(*args) ⇒ Boolean
148 149 150 |
# File 'lib/zhongwen_tools/string.rb', line 148 def self.zh?(*args) Basement.new.zh?(*args) end |
Instance Method Details
#ascii?(str = nil) ⇒ Boolean
61 62 63 64 |
# File 'lib/zhongwen_tools/string.rb', line 61 def ascii?(str = nil) str ||= self str.chars.size == str.bytes.to_a.size end |
#chars(str = nil) ⇒ Object
41 42 43 |
# File 'lib/zhongwen_tools/string.rb', line 41 def chars(str = nil) (str || self).scan(/./mu).to_a end |
#convert_regex(regex) ⇒ Object
33 34 35 36 37 |
# File 'lib/zhongwen_tools/string/ruby18.rb', line 33 def convert_regex(regex) str = regex.to_s regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)} /#{str}/ end |
#from_codepoint(str = nil) ⇒ Object
100 101 102 103 104 |
# File 'lib/zhongwen_tools/string.rb', line 100 def from_codepoint(str = nil) str ||= self [str.sub(/\\?u/,'').hex].pack("U") end |
#fullwidth?(str = nil) ⇒ Boolean
75 76 77 78 |
# File 'lib/zhongwen_tools/string.rb', line 75 def fullwidth?(str = nil) str ||= self !self.halfwidth?(str) && self.to_halfwidth(str) != str end |
#halfwidth?(str = nil) ⇒ Boolean
70 71 72 73 |
# File 'lib/zhongwen_tools/string.rb', line 70 def halfwidth?(str = nil) str ||= self str[/[0-9A-Za-z%.:#$&+-/\=;<>]/].nil? end |
#has_zh?(str = nil) ⇒ Boolean
18 19 20 21 22 |
# File 'lib/zhongwen_tools/string.rb', line 18 def has_zh?(str = nil) str ||= self !str[/(#{UNICODE_REGEX[:zh]}|#{UNICODE_REGEX[:punc]})/].nil? end |
#has_zh_punctuation?(str = nil) ⇒ Boolean
30 31 32 33 34 |
# File 'lib/zhongwen_tools/string.rb', line 30 def has_zh_punctuation?(str = nil) str ||= self !str[UNICODE_REGEX[:punc]].nil? end |
#multibyte?(str = nil) ⇒ Boolean
66 67 68 |
# File 'lib/zhongwen_tools/string.rb', line 66 def multibyte?(str = nil) !(str || self).ascii? end |
#reverse(str = nil) ⇒ Object
45 46 47 48 |
# File 'lib/zhongwen_tools/string.rb', line 45 def reverse(str = nil) str ||= self str.chars.reverse.join end |
#size(str = nil) ⇒ Object
36 37 38 39 |
# File 'lib/zhongwen_tools/string.rb', line 36 def size(str = nil) str ||= self str.chars.size end |
#to_codepoint(str = nil) ⇒ Object
92 93 94 95 96 97 98 |
# File 'lib/zhongwen_tools/string.rb', line 92 def to_codepoint(str = nil) str ||= self #chars = (self.class.to_s == 'String')? self.chars : self.chars(str) codepoints = str.chars.map{|c| "\\u%04x" % c.unpack("U")[0]} codepoints.join end |
#to_halfwidth(str = nil) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/zhongwen_tools/string.rb', line 80 def to_halfwidth(str = nil) str ||= self matches = str.scan(/([0-9A-Za-z%.:#$&+-/\=;<>])/u).uniq.flatten matches.each do |match| replacement = FW_HW[match] str = str.gsub(match, replacement) #unless str.nil? end str end |
#to_utf8(encoding = nil, encodings = nil) ⇒ Object
13 14 15 16 |
# File 'lib/zhongwen_tools/string.rb', line 13 def to_utf8(str = nil) (str || self).force_encoding('utf-8') #TODO: better conversion functions available in categorize end |
#uri_encode(str = nil) ⇒ Object
50 51 52 53 |
# File 'lib/zhongwen_tools/string.rb', line 50 def uri_encode(str = nil) str ||= self URI.encode str end |
#uri_escape(str = nil) ⇒ Object
55 56 57 58 59 |
# File 'lib/zhongwen_tools/string.rb', line 55 def uri_escape(str = nil) str ||= self URI.escape(str, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]")) end |
#zh?(str = nil) ⇒ Boolean
24 25 26 27 28 |
# File 'lib/zhongwen_tools/string.rb', line 24 def zh?(str = nil) str ||= self str.scan(/(#{UNICODE_REGEX[:zh]}+|#{UNICODE_REGEX[:punc]}+|\s+)/).join == str end |