Class: ANSEL::Converter
- Inherits:
-
Object
- Object
- ANSEL::Converter
- Includes:
- CharacterMap
- Defined in:
- lib/ansel/converter.rb
Constant Summary
Constants included from CharacterMap
ANSEL::CharacterMap::ANSI_TO_UTF16_MAP, ANSEL::CharacterMap::COMBINING, ANSEL::CharacterMap::NON_COMBINING
Instance Method Summary collapse
- #convert(string) ⇒ Object
-
#initialize(to_charset = 'UTF-8') ⇒ Converter
constructor
A new instance of Converter.
- #utf16_to_utf8(string) ⇒ Object
Constructor Details
#initialize(to_charset = 'UTF-8') ⇒ Converter
Returns a new instance of Converter.
7 8 9 10 |
# File 'lib/ansel/converter.rb', line 7 def initialize(to_charset = 'UTF-8') @to_charset = to_charset @encoding_converter = Encoding::Converter.new('UTF-16BE', 'UTF-8') end |
Instance Method Details
#convert(string) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/ansel/converter.rb', line 16 def convert(string) output = '' scanner = StringScanner.new(string) until scanner.eos? byte = scanner.get_byte char = byte.unpack('C')[0] char_hex = char.to_s(16).upcase case char when 0x00..0x7F output << byte when 0x88..0xC8 output << utf16_to_utf8(ANSI_TO_UTF16_MAP[char_hex] || ANSI_TO_UTF16_MAP['ERR']) last_byte = scanner.get_byte if scanner.check(/\00/) when 0xE0..0xFB [2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte bytes = [char_hex] scanner.peek(n).each_byte { |b| bytes << b.to_s(16).upcase } hex_key = bytes.join('+') next unless ANSI_TO_UTF16_MAP.key?(hex_key) output << utf16_to_utf8(ANSI_TO_UTF16_MAP[hex_key]) n.times { scanner.get_byte } break end else output << utf16_to_utf8(ANSI_TO_UTF16_MAP['ERR']) scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte end end output end |
#utf16_to_utf8(string) ⇒ Object
12 13 14 |
# File 'lib/ansel/converter.rb', line 12 def utf16_to_utf8(string) @encoding_converter.convert(string) end |