Method: String::Cleaner#fix_encoding

Defined in:
lib/string_cleaner.rb

#fix_encodingObject



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/string_cleaner.rb', line 10

def fix_encoding
  utf8 = dup
  if utf8.respond_to?(:force_encoding)
    utf8.force_encoding("UTF-8") # for Ruby 1.9+
    unless utf8.valid_encoding? # if invalid UTF-8
      utf8 = utf8.force_encoding("ISO8859-1")
      utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
    end
    utf8.gsub!(/\u0080|¤/, "") # special case for euro sign from Windows-1252
    utf8
  else
    require "iconv"
    utf8 << " "
    begin
      Iconv.new("UTF-8", "UTF-8").iconv(utf8)
    rescue
      utf8.gsub!(/\x80/n, "\xA4")
      Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "")
    end
  end
end