Module: REXML::Encoding
- Defined in:
- lib/rexml/encoding.rb,
lib/rexml/encodings/ICONV.rb,
lib/rexml/encodings/UNILE.rb,
lib/rexml/encodings/UTF-8.rb,
lib/rexml/encodings/EUC-JP.rb,
lib/rexml/encodings/CP-1252.rb,
lib/rexml/encodings/US-ASCII.rb,
lib/rexml/encodings/SHIFT-JIS.rb,
lib/rexml/encodings/ISO-8859-1.rb,
lib/rexml/encodings/ISO-8859-15.rb
Constant Summary collapse
- UTF_8 =
Native, default format is UTF-8, so it is declared here rather than in an encodings/ definition.
'UTF-8'- UTF_16 =
'UTF-16'- UNILE =
'UNILE'- @@__REXML_encoding_methods =
%q~ # Convert from UTF-8 def to_iso_8859_15 content array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| case num # shortcut first bunch basic characters when 0..0xA3: array_enc << num # characters removed compared to iso-8859-1 when 0xA4: array_enc << '¤' when 0xA6: array_enc << '¦' when 0xA8: array_enc << '¨' when 0xB4: array_enc << '´' when 0xB8: array_enc << '¸' when 0xBC: array_enc << '¼' when 0xBD: array_enc << '½' when 0xBE: array_enc << '¾' # characters added compared to iso-8859-1 when 0x20AC: array_enc << 0xA4 # 0xe2 0x82 0xac when 0x0160: array_enc << 0xA6 # 0xc5 0xa0 when 0x0161: array_enc << 0xA8 # 0xc5 0xa1 when 0x017D: array_enc << 0xB4 # 0xc5 0xbd when 0x017E: array_enc << 0xB8 # 0xc5 0xbe when 0x0152: array_enc << 0xBC # 0xc5 0x92 when 0x0153: array_enc << 0xBD # 0xc5 0x93 when 0x0178: array_enc << 0xBE # 0xc5 0xb8 else # all remaining basic characters can be used directly if num <= 0xFF array_enc << num else # Numeric entity (&#nnnn;); shard by Stefan Scholl array_enc.concat "&\##{num};".unpack('C*') end end end array_enc.pack('C*') end # Convert to UTF-8 def from_iso_8859_15(str) array_latin9 = str.unpack('C*') array_enc = [] array_latin9.each do |num| case num # characters that differ compared to iso-8859-1 when 0xA4: array_enc << 0x20AC when 0xA6: array_enc << 0x0160 when 0xA8: array_enc << 0x0161 when 0xB4: array_enc << 0x017D when 0xB8: array_enc << 0x017E when 0xBC: array_enc << 0x0152 when 0xBD: array_enc << 0x0153 when 0xBE: array_enc << 0x0178 else array_enc << num end end array_enc.pack('U*') end ~
Instance Attribute Summary collapse
-
#encoding ⇒ Object
ID —> Encoding name.
Class Method Summary collapse
Instance Method Summary collapse
- #check_encoding(str) ⇒ Object
-
#decode_ascii(str) ⇒ Object
Convert to UTF-8.
- #decode_iconv(str) ⇒ Object
- #decode_unile(str) ⇒ Object
- #decode_utf8(str) ⇒ Object
-
#encode_ascii(content) ⇒ Object
Convert from UTF-8.
- #encode_iconv(content) ⇒ Object
- #encode_unile(content) ⇒ Object
- #encode_utf8(content) ⇒ Object
Instance Attribute Details
#encoding ⇒ Object
ID —> Encoding name
22 23 24 |
# File 'lib/rexml/encoding.rb', line 22 def encoding @encoding end |
Class Method Details
.apply(obj, enc) ⇒ Object
8 9 10 |
# File 'lib/rexml/encoding.rb', line 8 def self.apply(obj, enc) @encoding_methods[enc][obj] end |
.encoding_method(enc) ⇒ Object
11 12 13 |
# File 'lib/rexml/encoding.rb', line 11 def self.encoding_method(enc) @encoding_methods[enc] end |
.register(enc, &block) ⇒ Object
5 6 7 |
# File 'lib/rexml/encoding.rb', line 5 def self.register(enc, &block) @encoding_methods[enc] = block end |
Instance Method Details
#check_encoding(str) ⇒ Object
57 58 59 60 61 62 63 64 |
# File 'lib/rexml/encoding.rb', line 57 def check_encoding str # We have to recognize UTF-16, LSB UTF-16, and UTF-8 return UTF_16 if /\A\xfe\xff/n =~ str return UNILE if /\A\xff\xfe/n =~ str str =~ /^\s*<?xml\s*version\s*=\s*(['"]).*?\2\s*encoding\s*=\s*(["'])(.*?)\2/um return $1.upcase if $1 return UTF_8 end |
#decode_ascii(str) ⇒ Object
Convert to UTF-8
19 20 21 |
# File 'lib/rexml/encodings/US-ASCII.rb', line 19 def decode_ascii(str) str.unpack('C*').pack('U*') end |
#decode_iconv(str) ⇒ Object
6 7 8 |
# File 'lib/rexml/encodings/ICONV.rb', line 6 def decode_iconv(str) Iconv.conv(UTF_8, @encoding, str) end |
#decode_unile(str) ⇒ Object
18 19 20 21 22 23 24 25 |
# File 'lib/rexml/encodings/UNILE.rb', line 18 def decode_unile(str) array_enc=str.unpack('C*') array_utf8 = [] 0.step(array_enc.size-1, 2){|i| array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100) } array_utf8.pack('U*') end |
#decode_utf8(str) ⇒ Object
7 8 9 |
# File 'lib/rexml/encodings/UTF-8.rb', line 7 def decode_utf8(str) str end |
#encode_ascii(content) ⇒ Object
Convert from UTF-8
4 5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/rexml/encodings/US-ASCII.rb', line 4 def encode_ascii content array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| if num <= 0x7F array_enc << num else # Numeric entity (&#nnnn;); shard by Stefan Scholl array_enc.concat "&\##{num};".unpack('C*') end end array_enc.pack('C*') end |
#encode_iconv(content) ⇒ Object
10 11 12 |
# File 'lib/rexml/encodings/ICONV.rb', line 10 def encode_iconv(content) Iconv.conv(@encoding, UTF_8, content) end |
#encode_unile(content) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/rexml/encodings/UNILE.rb', line 3 def encode_unile content array_utf8 = content.unpack("U*") array_enc = [] array_utf8.each do |num| if ((num>>16) > 0) array_enc << ?? array_enc << 0 else array_enc << (num & 0xFF) array_enc << (num >> 8) end end array_enc.pack('C*') end |
#encode_utf8(content) ⇒ Object
3 4 5 |
# File 'lib/rexml/encodings/UTF-8.rb', line 3 def encode_utf8 content content end |