Module: FaradayJSON::Encoding

Included in:: EncodeJson, ParseJson

Defined in:: lib/faraday_json/encoding.rb

Overview

Character encoding helper functions

Instance Method Summary collapse

#bin_to_hex(data) ⇒ Object

Helper function for testing.
#get_bom(enc) ⇒ Object

Given a (canonical) encoding, returns a BOM as an array of byte values.
#get_canonical_encoding(enc) ⇒ Object

Returns a canonical version of an encoding.
#get_dominant_encoding(str, charset, opts = {}) ⇒ Object

Given a String with (potentially, this depends on Ruby version) an encoding, and a charset from a content-type header (which may be nil), determines the dominant encoding.
#strip_bom(data, charset, opts = {}) ⇒ Object

Helper function; strips a BOM for UTF-16 encodings.
#to_utf8(data, charset, opts = {}) ⇒ Object

end ruby 1.8/start ruby > 1.8.
#transcode(data, input_charset, output_charset, opts = {}) ⇒ Object

end ruby 1.8/start ruby > 1.8.

Instance Method Details

#bin_to_hex(data) ⇒ `Object`

Helper function for testing

# File 'lib/faraday_json/encoding.rb', line 211

def bin_to_hex(data)
  if data.respond_to? :each_byte
    return data.each_byte.map { |b| b.to_s(16) }.join
  end
  return data
end

#get_bom(enc) ⇒ `Object`

Given a (canonical) encoding, returns a BOM as an array of byte values. If the given encoding does not have a BOM, an empty array is returned.

# File 'lib/faraday_json/encoding.rb', line 192

def get_bom(enc)
  bom = []
  if enc.start_with?('utf16be') or enc.start_with?('utf-16be')
    bom = [0xfe, 0xff]
  elsif enc.start_with?('utf16le') or enc.start_with?('utf-16le')
    bom = [0xff, 0xfe]
  elsif enc.start_with?('utf8') or enc.start_with?('utf-8')
    bom = [0xef, 0xbb, 0xbf]
  elsif enc.start_with?('utf32be') or enc.start_with?('utf-32be')
    bom = [0x00, 0x00, 0xfe, 0xff]
  elsif enc.start_with?('utf32le') or enc.start_with?('utf-32le')
    bom = [0xff, 0xfe, 0x00, 0x00]
  end
  return bom
end

#get_canonical_encoding(enc) ⇒ `Object`

Returns a canonical version of an encoding.

# File 'lib/faraday_json/encoding.rb', line 178

def get_canonical_encoding(enc)
  if defined? ::Encoding and ::Encoding.respond_to? :find
    # Oh... Ruby 1.9.2 doesn't like passing an Encoding to find()...
    if not enc.is_a? ::Encoding
      enc = ::Encoding.find(enc)
    end
    return enc.to_s.downcase
  end
  return enc.downcase
end

#get_dominant_encoding(str, charset, opts = {}) ⇒ `Object`

Given a String with (potentially, this depends on Ruby version) an encoding, and a charset from a content-type header (which may be nil), determines the dominant encoding. (Charset, if given, overrides internal encoding, if present).

# File 'lib/faraday_json/encoding.rb', line 155

def get_dominant_encoding(str, charset, opts = {})
  enc = nil
  if str.respond_to? :encoding
    enc = str.encoding
  end

  if charset.nil? or charset.empty?
    if enc.nil?
      default_encoding = opts.fetch('default_encoding', nil)
      if default_encoding.nil?
        raise "No charset provided, don't know what to do!" # FIXME
      end
      enc = default_encoding
    end
  else
    enc = charset
  end

  return enc
end

#strip_bom(data, charset, opts = {}) ⇒ `Object`

Helper function; strips a BOM for UTF-16 encodings

# File 'lib/faraday_json/encoding.rb', line 111

def strip_bom(data, charset, opts = {})
  # Only need to do this on Strings
  if not data.is_a? String
    return data
  end

  # If the charset is given, it overrides string internal encoding.
  enc = get_dominant_encoding(data, charset, opts)

  # Make the encoding canonical (if we can find out about that).
  canonical = get_canonical_encoding(enc)

  # Determine what a BOM would look like.
  bom = get_bom(canonical)

  # We can't operate on data, we need a byte array.
  arr = data.each_byte.to_a

  # Match BOM
  found = true
  bom.each_index do |i|
    if bom[i] != arr[i]
      found = false
      break
    end
  end

  # So we may have found a BOM! Strip it.
  if found
    ret = arr[bom.length..-1].pack('c*')
    if ret.respond_to? :force_encoding
      ret.force_encoding(canonical)
    end
    return ret
  end

  # No BOM
  return data
end

#to_utf8(data, charset, opts = {}) ⇒ `Object`

end ruby 1.8/start ruby > 1.8

# File 'lib/faraday_json/encoding.rb', line 103

def to_utf8(data, charset, opts = {})
  if data.is_a? Hash
    transcoded = {}
    data.each do |key, value|
      transcoded[to_utf8(key, charset, opts)] = to_utf8(value, charset, opts)
    end
    return transcoded
  elsif data.is_a? Array
    transcoded = []
    data.each do |value|
      transcoded << to_utf8(value, charset, opts)
    end
    return transcoded
  elsif data.is_a? String
    return transcode(data, charset, 'UTF-8//IGNORE', opts)
  else
    return data
  end
end

#transcode(data, input_charset, output_charset, opts = {}) ⇒ `Object`

end ruby 1.8/start ruby > 1.8

# File 'lib/faraday_json/encoding.rb', line 39

def transcode(data, input_charset, output_charset, opts = {})
  # In Ruby 1.8, we pretty much have to believe the given charsets; there's
  # not a lot of choice.

  # If we don't have an input charset, we can't do better than US-ASCII.
  if input_charset.nil? or input_charset.empty?
    input_charset = opts.fetch('default_input_charset', 'us-ascii')
  end

  # The default output charset, on the other hand, should be UTF-8.
  if output_charset.nil? or output_charset.empty?
    output_charset = opts.fetch('default_output_charset', 'UTF-8//IGNORE')
  end

  # Transcode using iconv
  require 'iconv'
  return ::Iconv.conv(output_charset, input_charset, data)
end

Module: FaradayJSON::Encoding

Overview

Instance Method Summary collapse

Instance Method Details

#bin_to_hex(data) ⇒ Object

#get_bom(enc) ⇒ Object

#get_canonical_encoding(enc) ⇒ Object

#get_dominant_encoding(str, charset, opts = {}) ⇒ Object

#strip_bom(data, charset, opts = {}) ⇒ Object

#to_utf8(data, charset, opts = {}) ⇒ Object