Module: Corefines::String::ForceUTF8

Defined in:
lib/corefines/string.rb

Instance Method Summary collapse

Instance Method Details

#force_utf8String

Returns a copy of str with encoding changed to UTF-8 and all invalid byte sequences replaced with the Unicode Replacement Character (U+FFFD).

If str responds to #scrub! (Ruby >=2.1), then it's used for replacing invalid bytes. Otherwise a simple custom implementation is used (may not return the same result as #scrub!).


207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/corefines/string.rb', line 207

module ForceUTF8
  refine ::String do
    def force_utf8
      dup.force_utf8!
    end

    def force_utf8!
      str = force_encoding(Encoding::UTF_8)

      if str.respond_to? :scrub!
        str.scrub!

      else
        result = ''.force_encoding('BINARY')
        invalid = false

        str.chars.each do |c|
          if c.valid_encoding?
            result << c
            invalid = false
          elsif !invalid
            result << "\uFFFD"
            invalid = true
          end
        end

        replace result.force_encoding(Encoding::UTF_8)
      end
    end
  end
end

#force_utf8!String

Changes the encoding to UTF-8, replaces all invalid byte sequences with the Unicode Replacement Character (U+FFFD) and returns self. This is same as #force_utf8, except it indents the receiver in-place.


207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/corefines/string.rb', line 207

module ForceUTF8
  refine ::String do
    def force_utf8
      dup.force_utf8!
    end

    def force_utf8!
      str = force_encoding(Encoding::UTF_8)

      if str.respond_to? :scrub!
        str.scrub!

      else
        result = ''.force_encoding('BINARY')
        invalid = false

        str.chars.each do |c|
          if c.valid_encoding?
            result << c
            invalid = false
          elsif !invalid
            result << "\uFFFD"
            invalid = true
          end
        end

        replace result.force_encoding(Encoding::UTF_8)
      end
    end
  end
end