Class: UCSCodepoint

Inherits:
Integer
  • Object
show all
Defined in:
lib/unicode_madness/ucs_codepoint.rb

Instance Method Summary collapse

Instance Method Details

#inspectObject



54
55
56
# File 'lib/unicode_madness/ucs_codepoint.rb', line 54

def inspect
  "#<#{self.class}:0x#{self.to_i.to_s(16)} #{self.to_s.inspect}>"
end

#kana?Boolean

Returns a Boolean indicating whether this UCS codepoint represents a hiragana or katakana character.

Returns:

  • (Boolean)


14
15
16
17
# File 'lib/unicode_madness/ucs_codepoint.rb', line 14

def kana?
  (self >= 0x3040 && self <= 0x30ff) ||
  (self >= 0x31f0 && self <= 0x31ff)
end

#kanji?Boolean

Returns a Boolean indicating whether this UCS codepoint represents a kanji character.

Returns:

  • (Boolean)


6
7
8
9
10
# File 'lib/unicode_madness/ucs_codepoint.rb', line 6

def kanji?
  (self >=  0x4e00 && self <=  0x9fbf) ||
  (self >=  0x3400 && self <=  0x4dbf) ||
  (self >= 0x20000 && self <= 0x2a6df)
end

#to_sObject

Returns an encoded string containing the character represented by this UCS codepoint. Currently only UTF-8 encoding is supported.



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/unicode_madness/ucs_codepoint.rb', line 27

def to_s
  unless $KCODE =~ /^u/i
    raise ArgumentError, 'unrecognized encoding (only UTF-8 is supported at the moment)'
  end
  
  if self <= 0x7f
    ch = ' '
    ch[0] = to_i
  elsif self <= 0x7ff
    ch = '  '
    ch[0] = ((self & 0x7c0) >> 6) | 0xc0
    ch[1] = self & 0x3f | 0x80
  elsif self <= 0xffff
    ch = '   '
    ch[0] = ((self & 0xf000) >> 12) | 0xe0
    ch[1] = ((self & 0xfc0) >> 6) | 0x80
    ch[2] = self & 0x3f | 0x80
  else
    ch = '    '
    ch[0] = ((self & 0x1c0000) >> 18) | 0xf0
    ch[1] = ((self & 0x3f000) >> 12) | 0x80
    ch[2] = ((self & 0xfc0) >> 6) | 0x80
    ch[3] = (self & 0x3f) | 0x80
  end
  return ch
end

#wide_latin?Boolean

Returns a Boolean indicating whether this UCS codepoint represents a full-width latin character.

Returns:

  • (Boolean)


21
22
23
# File 'lib/unicode_madness/ucs_codepoint.rb', line 21

def wide_latin?
  self >= 0xff10 && self <= 0xff5a
end