Module: JavaProperties::Encoding::Unicode

Defined in:
lib/java-properties/encoding/unicode.rb

Overview

Module to encode and decode unicode chars This code is highly influced by Florian Frank’s JSON gem

Constant Summary collapse

MAP =
{
  "\x0" => '\u0000',
  "\x1" => '\u0001',
  "\x2" => '\u0002',
  "\x3" => '\u0003',
  "\x4" => '\u0004',
  "\x5" => '\u0005',
  "\x6" => '\u0006',
  "\x7" => '\u0007',
  "\xb" => '\u000b',
  "\xe" => '\u000e',
  "\xf" => '\u000f',
  "\x10" => '\u0010',
  "\x11" => '\u0011',
  "\x12" => '\u0012',
  "\x13" => '\u0013',
  "\x14" => '\u0014',
  "\x15" => '\u0015',
  "\x16" => '\u0016',
  "\x17" => '\u0017',
  "\x18" => '\u0018',
  "\x19" => '\u0019',
  "\x1a" => '\u001a',
  "\x1b" => '\u001b',
  "\x1c" => '\u001c',
  "\x1d" => '\u001d',
  "\x1e" => '\u001e',
  "\x1f" => '\u001f',
}
EMPTY_8BIT_STRING =
''

Class Method Summary collapse

Class Method Details

.decode!(text) ⇒ String

Decodes all unicode chars from escape sequences in place

Parameters:

  • text (String)

Returns:

  • (String)

    The encoded text for chaining



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/java-properties/encoding/unicode.rb', line 46

def self.decode!(text)
  string = text.dup
  string = string.gsub(%r((?:\\[uU](?:[A-Fa-f\d]{4}))+)) do |c|
    c.downcase!
    bytes = EMPTY_8BIT_STRING.dup
    i = 0
    while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
      bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
      i += 1
    end
    bytes.encode("utf-8", "utf-16be")
  end
  string.force_encoding(::Encoding::UTF_8)

  text.replace string
  text
end

.encode!(text) ⇒ String

Decodes all unicode chars into escape sequences in place

Parameters:

  • text (String)

Returns:

  • (String)

    The decoded text for chaining



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/java-properties/encoding/unicode.rb', line 67

def self.encode!(text)
  string = text.dup
  string.force_encoding(::Encoding::ASCII_8BIT)
  string.gsub!(/["\\\x0-\x1f]/n) { |c| MAP[c] || c }
  string.gsub!(/(
    (?:
     [\xc2-\xdf][\x80-\xbf]    |
     [\xe0-\xef][\x80-\xbf]{2} |
     [\xf0-\xf4][\x80-\xbf]{3}
    )+ |
    [\x80-\xc1\xf5-\xff]       # invalid
  )/nx) { |c|
    c.size == 1 and raise "Invalid utf8 byte: '#{c}'"
    s = c.encode("utf-16be", "utf-8").unpack('H*')[0]
    s.force_encoding(::Encoding::ASCII_8BIT)
    s.gsub!(/.{4}/n, '\\\\u\&')
    s.force_encoding(::Encoding::UTF_8)
  }
  string.force_encoding(::Encoding::UTF_8)
  text.replace string
  text
end