Module: ZhongwenTools::String

Included in:
Basement
Defined in:
lib/zhongwen_tools/string.rb,
lib/zhongwen_tools/string/ruby18.rb

Defined Under Namespace

Classes: Basement

Constant Summary collapse

UNICODE_REGEX =
{
  :zh => /[\u2E80-\u2E99]|[\u2E9B-\u2EF3]|[\u2F00-\u2FD5]|[\u3005|\u3007]|[\u3021-\u3029]|[\u3038-\u303B]|[\u3400-\u4DB5]|[\u4E00-\u9FCC]|[\uF900-\uFA6D]|[\uFA70-\uFAD9]/,
  :punc => /[\u0021-\u0023]|[\u0025-\u002A]|[\u002C-\u002F]|[\u003A\u003B\u003F\u0040]|[\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387]|[\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F]|[\u066A-\u066D]|[\u06D4]|[\u0700-\u070D]|[\u07F7-\u07F9]|[\u0830-\u083E]|[\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B]|[\u0F04-\u0F12]|[\u0F14]|[\u0F3A-\u0F3D]|[\u0F85]|[\u0FD0-\u0FD4]|[\u0FD9\u0FDA]|[\u104A-\u104F]|[\u10FB]|[\u1360-\u1368]|[\u1400\u166D\u166E\u169B\u169C]|[\u16EB-\u16ED]|[\u1735\u1736]|[\u17D4-\u17D6]|[\u17D8-\u17DA]|[\u1800-\u180A\u1944\u1945\u1A1E\u1A1F]|[\u1AA0-\u1AA6]|[\u1AA8-\u1AAD]|[\u1B5A-\u1B60]|[\u1BFC-\u1BFF]|[\u1C3B-\u1C3F]|[\u1C7E\u1C7F]|[\u1CC0-\u1CC7]|[\u1CD3]|[\u2010-\u2027]|[\u2030-\u2043]|[\u2045-\u2051]|[\u2053-\u205E]|[\u207D\u207E\u208D\u208E\u2329\u232A]|[\u2768-\u2775\u27C5\u27C6]|[\u27E6-\u27EF]|[\u2983-\u2998]|[\u29D8-\u29DB\u29FC\u29FD]|[\u2CF9-\u2CFC]|[\u2CFE\u2CFF\u2D70]|[\u2E00-\u2E2E]|[\u2E30-\u2E3B]|[\u3001-\u3003]|[\u3008-\u3011]|[\u3014-\u301F]|[\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF]|[\uA60D-\uA60F]|[\uA673\uA67E]|[\uA6F2-\uA6F7]|[\uA874-\uA877]|[\uA8CE\uA8CF]|[\uA8F8-\uA8FA]|[\uA92E\uA92F\uA95F]|[\uA9C1-\uA9CD]|[\uA9DE\uA9DF]|[\uAA5C-\uAA5F]|[\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F]|[\uFE10-\uFE19]|[\uFE30-\uFE52]|[\uFE54-\uFE61]|[\uFE63\uFE68\uFE6A\uFE6B]|[\uFF01-\uFF03]|[\uFF05-\uFF0A]|[\uFF0C-\uFF0F]|[\uFF1A\uFF1B\uFF1F\uFF20]|[\uFF3B-\uFF3D]|[\uFF3F\uFF5B\uFF5D]|[\uFF5F-\uFF65]/
}

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.ascii?(*args) ⇒ Boolean

Returns:

  • (Boolean)


127
128
129
# File 'lib/zhongwen_tools/string.rb', line 127

def self.ascii?(*args)
  Basement.new.ascii?(*args)
end

.chars(*args) ⇒ Object



109
110
111
# File 'lib/zhongwen_tools/string.rb', line 109

def self.chars(*args)
  Basement.new.chars(*args)
end

.from_codepoint(*args) ⇒ Object



154
155
156
# File 'lib/zhongwen_tools/string.rb', line 154

def self.from_codepoint(*args)
  Basement.new.from_codepoint(*args)
end

.fullwidth?(*args) ⇒ Boolean

Returns:

  • (Boolean)


136
137
138
# File 'lib/zhongwen_tools/string.rb', line 136

def self.fullwidth?(*args)
  Basement.new.fullwidth?(*args)
end

.halfwidth?(*args) ⇒ Boolean

Returns:

  • (Boolean)


133
134
135
# File 'lib/zhongwen_tools/string.rb', line 133

def self.halfwidth?(*args)
  Basement.new.halfwidth?(*args)
end

.has_zh?(*args) ⇒ Boolean

Returns:

  • (Boolean)


142
143
144
# File 'lib/zhongwen_tools/string.rb', line 142

def self.has_zh?(*args)
  Basement.new.has_zh?(*args)
end

.has_zh_punctuation?(*args) ⇒ Boolean

Returns:

  • (Boolean)


145
146
147
# File 'lib/zhongwen_tools/string.rb', line 145

def self.has_zh_punctuation?(*args)
  Basement.new.has_zh_punctuation?(*args)
end

.multibyte?(*args) ⇒ Boolean

Returns:

  • (Boolean)


130
131
132
# File 'lib/zhongwen_tools/string.rb', line 130

def self.multibyte?(*args)
  Basement.new.multibyte?(*args)
end

.reverse(*args) ⇒ Object



115
116
117
# File 'lib/zhongwen_tools/string.rb', line 115

def self.reverse(*args)
  Basement.new.reverse(*args)
end

.size(*args) ⇒ Object



112
113
114
# File 'lib/zhongwen_tools/string.rb', line 112

def self.size(*args)
  Basement.new.size(*args)
end

.to_codepoint(*args) ⇒ Object



151
152
153
# File 'lib/zhongwen_tools/string.rb', line 151

def self.to_codepoint(*args)
  Basement.new.to_codepoint(*args)
end

.to_halfwidth(*args) ⇒ Object



139
140
141
# File 'lib/zhongwen_tools/string.rb', line 139

def self.to_halfwidth(*args)
  Basement.new.to_halfwidth(*args)
end

.to_utf8(*args) ⇒ Object



118
119
120
# File 'lib/zhongwen_tools/string.rb', line 118

def self.to_utf8(*args)
  Basement.new.to_utf8(*args)
end

.uri_encode(*args) ⇒ Object



121
122
123
# File 'lib/zhongwen_tools/string.rb', line 121

def self.uri_encode(*args)
  Basement.new.uri_encode(*args)
end

.uri_escape(*args) ⇒ Object



124
125
126
# File 'lib/zhongwen_tools/string.rb', line 124

def self.uri_escape(*args)
  Basement.new.uri_escape(*args)
end

.zh?(*args) ⇒ Boolean

Returns:

  • (Boolean)


148
149
150
# File 'lib/zhongwen_tools/string.rb', line 148

def self.zh?(*args)
  Basement.new.zh?(*args)
end

Instance Method Details

#ascii?(str = nil) ⇒ Boolean

Returns:

  • (Boolean)


61
62
63
64
# File 'lib/zhongwen_tools/string.rb', line 61

def ascii?(str = nil)
  str ||= self
  str.chars.size == str.bytes.to_a.size
end

#chars(str = nil) ⇒ Object



41
42
43
# File 'lib/zhongwen_tools/string.rb', line 41

def chars(str = nil)
  (str || self).scan(/./mu).to_a
end

#convert_regex(regex) ⇒ Object



33
34
35
36
37
# File 'lib/zhongwen_tools/string/ruby18.rb', line 33

def convert_regex(regex)
  str = regex.to_s
  regex.to_s.scan(/u[0-9A-Z]{4}/).each{|cp| str = str.sub('\\' + cp,cp.from_codepoint)}
  /#{str}/
end

#from_codepoint(str = nil) ⇒ Object



100
101
102
103
104
# File 'lib/zhongwen_tools/string.rb', line 100

def from_codepoint(str = nil)
  str ||= self

  [str.sub(/\\?u/,'').hex].pack("U")
end

#fullwidth?(str = nil) ⇒ Boolean

Returns:

  • (Boolean)


75
76
77
78
# File 'lib/zhongwen_tools/string.rb', line 75

def fullwidth?(str = nil)
  str ||= self
  !self.halfwidth?(str) && self.to_halfwidth(str) != str
end

#halfwidth?(str = nil) ⇒ Boolean

Returns:

  • (Boolean)


70
71
72
73
# File 'lib/zhongwen_tools/string.rb', line 70

def halfwidth?(str = nil)
  str ||= self
  str[/[0-9A-Za-z%.:#$&+-/\=;<>]/].nil?
end

#has_zh?(str = nil) ⇒ Boolean

Returns:

  • (Boolean)


18
19
20
21
22
# File 'lib/zhongwen_tools/string.rb', line 18

def has_zh?(str = nil)
  str ||= self

  !str[/(#{UNICODE_REGEX[:zh]}|#{UNICODE_REGEX[:punc]})/].nil?
end

#has_zh_punctuation?(str = nil) ⇒ Boolean

Returns:

  • (Boolean)


30
31
32
33
34
# File 'lib/zhongwen_tools/string.rb', line 30

def has_zh_punctuation?(str = nil)
  str ||= self

  !str[UNICODE_REGEX[:punc]].nil?
end

#multibyte?(str = nil) ⇒ Boolean

Returns:

  • (Boolean)


66
67
68
# File 'lib/zhongwen_tools/string.rb', line 66

def multibyte?(str = nil)
  !(str || self).ascii?
end

#reverse(str = nil) ⇒ Object



45
46
47
48
# File 'lib/zhongwen_tools/string.rb', line 45

def reverse(str = nil)
  str ||= self
  str.chars.reverse.join
end

#size(str = nil) ⇒ Object



36
37
38
39
# File 'lib/zhongwen_tools/string.rb', line 36

def size(str = nil)
  str ||= self
  str.chars.size
end

#to_codepoint(str = nil) ⇒ Object



92
93
94
95
96
97
98
# File 'lib/zhongwen_tools/string.rb', line 92

def to_codepoint(str = nil)
  str ||= self
  #chars = (self.class.to_s == 'String')? self.chars : self.chars(str)
  codepoints = str.chars.map{|c| "\\u%04x" % c.unpack("U")[0]}

  codepoints.join
end

#to_halfwidth(str = nil) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
# File 'lib/zhongwen_tools/string.rb', line 80

def to_halfwidth(str = nil)
  str ||= self
  matches = str.scan(/([0-9A-Za-z%.:#$&+-/\=;<>])/u).uniq.flatten

  matches.each do |match|
    replacement = FW_HW[match]
    str = str.gsub(match, replacement) #unless str.nil?
  end

  str
end

#to_utf8(encoding = nil, encodings = nil) ⇒ Object



13
14
15
16
# File 'lib/zhongwen_tools/string.rb', line 13

def to_utf8(str = nil)
  (str || self).force_encoding('utf-8')
  #TODO: better conversion functions available in categorize
end

#uri_encode(str = nil) ⇒ Object



50
51
52
53
# File 'lib/zhongwen_tools/string.rb', line 50

def uri_encode(str = nil)
  str ||= self
  URI.encode str
end

#uri_escape(str = nil) ⇒ Object



55
56
57
58
59
# File 'lib/zhongwen_tools/string.rb', line 55

def uri_escape(str = nil)
  str ||= self

  URI.escape(str, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
end

#zh?(str = nil) ⇒ Boolean

Returns:

  • (Boolean)


24
25
26
27
28
# File 'lib/zhongwen_tools/string.rb', line 24

def zh?(str = nil)
  str ||= self

  str.scan(/(#{UNICODE_REGEX[:zh]}+|#{UNICODE_REGEX[:punc]}+|\s+)/).join == str
end