Module: Translatomatic::EscapedUnicode

Defined in:
lib/translatomatic/escaped_unicode.rb

Overview

Module to encode and decode unicode chars. This code is highly influced by Florian Frank’s JSON gem

Class Method Summary collapse

Class Method Details

.escape(text) ⇒ String

Decodes all unicode chars into escape sequences

Parameters:

Returns:

  • (String)

    The decoded text for chaining



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/translatomatic/escaped_unicode.rb', line 32

def self.escape(text)
  string = text.dup
  string.force_encoding(::Encoding::ASCII_8BIT)
  string.gsub!(/["\\\x0-\x1f]/n) { |c| MAP[c] || c }
  string.gsub!(/(
    (?:
      [\xc2-\xdf][\x80-\xbf]    |
      [\xe0-\xef][\x80-\xbf]{2} |
      [\xf0-\xf4][\x80-\xbf]{3}
      )+ |
      [\x80-\xc1\xf5-\xff]       # invalid
      )/nx) { |c|
        c.size == 1 and raise t("unicode.invalid_byte", byte: c)
        s = c.encode("utf-16be", "utf-8").unpack('H*')[0]
        s.force_encoding(::Encoding::ASCII_8BIT)
        s.gsub!(/.{4}/n, '\\\\u\&')
        s.force_encoding(::Encoding::UTF_8)
      }
  string.force_encoding(::Encoding::UTF_8)
  text.replace string
  text
end

.unescape(text) ⇒ String

Decodes all unicode chars from escape sequences

Parameters:

Returns:

  • (String)

    The encoded text for chaining



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/translatomatic/escaped_unicode.rb', line 11

def self.unescape(text)
  string = text.dup
  string = string.gsub(%r((?:\\[uU](?:[A-Fa-f\d]{4}))+)) do |c|
    c.downcase!
    bytes = EMPTY_8BIT_STRING.dup
    i = 0
    while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
      bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
      i += 1
    end
    bytes.encode("utf-8", "utf-16be")
  end
  string.force_encoding(::Encoding::UTF_8)

  text.replace string
  text
end