Method: String::Cleaner#fix_invisible_chars

Defined in:
lib/string_cleaner.rb

#fix_invisible_charsObject



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/string_cleaner.rb', line 56

def fix_invisible_chars
  utf8 = self.dup
  utf8.gsub!(Regexp.new(ZERO_WIDTH.join("|")), "")
  utf8 = if utf8.respond_to?(:force_encoding)
    utf8 = (utf8 << " ").split(/\n/u).each{|line|
      line.gsub!(/[\s\p{C}]/u, " ")
    }.join("\n").chop!
  else
    require "oniguruma"
    utf8.split(/\n/n).collect{|line|
      Oniguruma::ORegexp.new("[\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
    }.join("\n").chop!
  end
  utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|") + "|\s"), " ")
  utf8
end