Module: FaradayJSON::Encoding

Included in:
EncodeJson, ParseJson
Defined in:
lib/faraday_json/encoding.rb

Overview

Character encoding helper functions

Instance Method Summary collapse

Instance Method Details

#bin_to_hex(data) ⇒ Object

Helper function for testing



211
212
213
214
215
216
# File 'lib/faraday_json/encoding.rb', line 211

def bin_to_hex(data)
  if data.respond_to? :each_byte
    return data.each_byte.map { |b| b.to_s(16) }.join
  end
  return data
end

#get_bom(enc) ⇒ Object

Given a (canonical) encoding, returns a BOM as an array of byte values. If the given encoding does not have a BOM, an empty array is returned.



192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/faraday_json/encoding.rb', line 192

def get_bom(enc)
  bom = []
  if enc.start_with?('utf16be') or enc.start_with?('utf-16be')
    bom = [0xfe, 0xff]
  elsif enc.start_with?('utf16le') or enc.start_with?('utf-16le')
    bom = [0xff, 0xfe]
  elsif enc.start_with?('utf8') or enc.start_with?('utf-8')
    bom = [0xef, 0xbb, 0xbf]
  elsif enc.start_with?('utf32be') or enc.start_with?('utf-32be')
    bom = [0x00, 0x00, 0xfe, 0xff]
  elsif enc.start_with?('utf32le') or enc.start_with?('utf-32le')
    bom = [0xff, 0xfe, 0x00, 0x00]
  end
  return bom
end

#get_canonical_encoding(enc) ⇒ Object

Returns a canonical version of an encoding.



178
179
180
181
182
183
184
185
186
187
# File 'lib/faraday_json/encoding.rb', line 178

def get_canonical_encoding(enc)
  if defined? ::Encoding and ::Encoding.respond_to? :find
    # Oh... Ruby 1.9.2 doesn't like passing an Encoding to find()...
    if not enc.is_a? ::Encoding
      enc = ::Encoding.find(enc)
    end
    return enc.to_s.downcase
  end
  return enc.downcase
end

#get_dominant_encoding(str, charset, opts = {}) ⇒ Object

Given a String with (potentially, this depends on Ruby version) an encoding, and a charset from a content-type header (which may be nil), determines the dominant encoding. (Charset, if given, overrides internal encoding, if present).



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/faraday_json/encoding.rb', line 155

def get_dominant_encoding(str, charset, opts = {})
  enc = nil
  if str.respond_to? :encoding
    enc = str.encoding
  end

  if charset.nil? or charset.empty?
    if enc.nil?
      default_encoding = opts.fetch('default_encoding', nil)
      if default_encoding.nil?
        raise "No charset provided, don't know what to do!" # FIXME
      end
      enc = default_encoding
    end
  else
    enc = charset
  end

  return enc
end

#strip_bom(data, charset, opts = {}) ⇒ Object

Helper function; strips a BOM for UTF-16 encodings



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/faraday_json/encoding.rb', line 111

def strip_bom(data, charset, opts = {})
  # Only need to do this on Strings
  if not data.is_a? String
    return data
  end

  # If the charset is given, it overrides string internal encoding.
  enc = get_dominant_encoding(data, charset, opts)

  # Make the encoding canonical (if we can find out about that).
  canonical = get_canonical_encoding(enc)

  # Determine what a BOM would look like.
  bom = get_bom(canonical)

  # We can't operate on data, we need a byte array.
  arr = data.each_byte.to_a

  # Match BOM
  found = true
  bom.each_index do |i|
    if bom[i] != arr[i]
      found = false
      break
    end
  end

  # So we may have found a BOM! Strip it.
  if found
    ret = arr[bom.length..-1].pack('c*')
    if ret.respond_to? :force_encoding
      ret.force_encoding(canonical)
    end
    return ret
  end

  # No BOM
  return data
end

#to_utf8(data, charset, opts = {}) ⇒ Object

end ruby 1.8/start ruby > 1.8



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/faraday_json/encoding.rb', line 103

def to_utf8(data, charset, opts = {})
  if data.is_a? Hash
    transcoded = {}
    data.each do |key, value|
      transcoded[to_utf8(key, charset, opts)] = to_utf8(value, charset, opts)
    end
    return transcoded
  elsif data.is_a? Array
    transcoded = []
    data.each do |value|
      transcoded << to_utf8(value, charset, opts)
    end
    return transcoded
  elsif data.is_a? String
    return transcode(data, charset, 'UTF-8//IGNORE', opts)
  else
    return data
  end
end

#transcode(data, input_charset, output_charset, opts = {}) ⇒ Object

end ruby 1.8/start ruby > 1.8



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/faraday_json/encoding.rb', line 39

def transcode(data, input_charset, output_charset, opts = {})
  # In Ruby 1.8, we pretty much have to believe the given charsets; there's
  # not a lot of choice.

  # If we don't have an input charset, we can't do better than US-ASCII.
  if input_charset.nil? or input_charset.empty?
    input_charset = opts.fetch('default_input_charset', 'us-ascii')
  end

  # The default output charset, on the other hand, should be UTF-8.
  if output_charset.nil? or output_charset.empty?
    output_charset = opts.fetch('default_output_charset', 'UTF-8//IGNORE')
  end

  # Transcode using iconv
  require 'iconv'
  return ::Iconv.conv(output_charset, input_charset, data)
end