Module: Bayes::CHARSET::UTF8

Defined in:
lib/bayes.rb,
lib/bayes/convert.rb

Constant Summary collapse

KCODE =
"u"
KATAKANA =
utf_range(0x30a0, 0x30ff)
BAR =
c2u(0x30fc)
KANJI =
utf_range(0x4e00, 0x9faf)
RE_KATAKANA =
/[#{KATAKANA}#{BAR}]{2,}/uo
RE_KANJI =
/[#{KANJI}]{2,}/uo
KCONV =
Kconv::UTF8

Class Method Summary collapse

Class Method Details

.c2u(c) ⇒ Object



28
29
30
# File 'lib/bayes.rb', line 28

def self.c2u(c)
	[c].pack("U")
end

.utf_range(a, b) ⇒ Object



31
32
33
# File 'lib/bayes.rb', line 31

def self.utf_range(a, b)
	"#{c2u(a)}-#{c2u(b)}"
end