Module: NewRelic::Agent::EncodingNormalizer::EncodingNormalizer

Defined in:
lib/new_relic/agent/encoding_normalizer.rb

Class Method Summary collapse

Class Method Details

.normalize(raw_string) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/new_relic/agent/encoding_normalizer.rb', line 42

def self.normalize(raw_string)
  encoding = raw_string.encoding
  if (encoding == Encoding::UTF_8 || encoding == Encoding::ISO_8859_1) && raw_string.valid_encoding?
    return raw_string
  end

  # If the encoding is not valid, or it's ASCII-8BIT, we know conversion to
  # UTF-8 is likely to fail, so treat it as ISO-8859-1 (byte-preserving).
  normalized = raw_string.dup
  if encoding == Encoding::ASCII_8BIT || !raw_string.valid_encoding?
    normalized.force_encoding(Encoding::ISO_8859_1)
  else
    # Encoding is valid and non-binary, so it might be cleanly convertible
    # to UTF-8. Give it a try and fall back to ISO-8859-1 if it fails.
    begin
      normalized.encode!(Encoding::UTF_8)
    rescue
      normalized.force_encoding(Encoding::ISO_8859_1)
    end
  end
  normalized
end