Module: TingYun::Support::Serialize::EncodingNormalizer::EncodingNormalizer

Defined in:
lib/ting_yun/support/serialize/encoding_normalizer.rb

Class Method Summary collapse

Class Method Details

.normalize(str) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/ting_yun/support/serialize/encoding_normalizer.rb', line 48

def self.normalize(str)
  encoding = str.encoding
  if (encoding == Encoding::UTF_8 || encoding == Encoding::ISO_8859_1) && str.valid_encoding?
    return str
  end
  # If the encoding is not valid, or it's ASCII-8BIT, we know conversion to
  # UTF-8 is likely to fail, so treat it as ISO-8859-1 (byte-preserving).
  normalized = str.dup
  if encoding == Encoding::ASCII_8BIT || !str.valid_encoding?
    normalized.force_encoding(Encoding::ISO_8859_1)
  else
    # Encoding is valid and non-binary, so it might be cleanly convertible
    # to UTF-8. Give it a try and fall back to ISO-8859-1 if it fails.
    begin
      normalized.encode!(Encoding::UTF_8)
    rescue
      normalized.force_encoding(Encoding::ISO_8859_1)
    end
  end
  normalized
end