Module: Rucc::UTF
- Defined in:
- lib/rucc/utf.rb
Class Method Summary collapse
- .count_leading_ones(c) ⇒ Integer
- .read_rune(s) ⇒ <Integer, <Integer>>
- .to_utf16(str) ⇒ String
- .to_utf32(str) ⇒ String
-
.write16(b, rune) ⇒ Object
@param(return) [String] b.
-
.write32(b, rune) ⇒ Object
@param(return) [String] b.
-
.write_utf8(b, rune) ⇒ Object
@param(return) [String] b.
Class Method Details
.count_leading_ones(c) ⇒ Integer
116 117 118 119 120 121 122 123 |
# File 'lib/rucc/utf.rb', line 116 def count_leading_ones(c) 7.downto(0).each do |i| if (c & (1 << i)) == 0 return 7 - i end end 8 end |
.read_rune(s) ⇒ <Integer, <Integer>>
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/rucc/utf.rb', line 82 def read_rune(s) len = count_leading_ones(s[0]) if len == 0 return s[0], s[1..-1] end if len > s.size raise "invalid UTF-8 sequence" # error("invalid UTF-8 sequence"); end 1.upto(len - 1).each do |i| if (s[i] & 0xC0) != 0x80 raise "invalid UTF-8 continuation byte" # error("invalid UTF-8 continuation byte"); end end case len when 2 r = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F) return r, s[2..-1] when 3 r = ((s[0] & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F) return r, s[3..-1] when 4 r = ((s[0] & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F) return r, s[4..-1] else raise "invalid UTF-8 sequence" # error("invalid UTF-8 sequence"); end end |
.to_utf16(str) ⇒ String
39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/rucc/utf.rb', line 39 def to_utf16(str) b = "" bytes = str.bytes while bytes.size > 0 rune, bytes = read_rune(bytes) if rune < 0x10000 write16(b, rune) else write16(b, (rune >> 10) + 0xD7C0) write16(b, (rune & 0x3FF) + 0xDC00) end end b end |
.to_utf32(str) ⇒ String
56 57 58 59 60 61 62 63 64 |
# File 'lib/rucc/utf.rb', line 56 def to_utf32(str) b = "" bytes = str.bytes while bytes.size > 0 rune, bytes = read_rune(bytes) write32(b, rune) end b end |
.write16(b, rune) ⇒ Object
@param(return) [String] b
68 69 70 71 |
# File 'lib/rucc/utf.rb', line 68 def write16(b, rune) b << (rune & 0xFF) b << (rune >> 8) end |
.write32(b, rune) ⇒ Object
@param(return) [String] b
75 76 77 78 |
# File 'lib/rucc/utf.rb', line 75 def write32(b, rune) write16(b, rune & 0xFFFF) write16(b, rune >> 16) end |
.write_utf8(b, rune) ⇒ Object
@param(return) [String] b
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/rucc/utf.rb', line 6 def write_utf8(b, rune) # In ruby, default encoding is UTF-8, so `String#<<` can append rune # as UTF-8 string b << rune # if rune < 0x80 # b << rune # return # end # if rune < 0x800 # b << (0xC0 | (rune >> 6)) # b << (0x80 | (rune & 0x3F)) # return # end # if rune < 0x10000 # b << (0xE0 | (rune >> 12)) # b << (0x80 | ((rune >> 6) & 0x3F)) # b << (0x80 | (rune & 0x3F)) # return # end # if rune < 0x200000 # b << (0xF0 | (rune >> 18)) # b << (0x80 | ((rune >> 12) & 0x3F)) # b << (0x80 | ((rune >> 6) & 0x3F)) # b << (0x80 | (rune & 0x3F)) # return # end # raise "invalid UCS character: \\U#{format("%08d", rune)}" # error("invalid UCS character: \\U%08x", rune); end |