56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
# File 'lib/string_cleaner.rb', line 56
def fix_invisible_chars
utf8 = self.dup
utf8.gsub!(Regexp.new(ZERO_WIDTH.join("|")), "")
utf8 = if utf8.respond_to?(:force_encoding)
utf8 = (utf8 << " ").split(/\n/u).each{|line|
line.gsub!(/[\s\p{C}]/u, " ")
}.join("\n").chop!
else
require "oniguruma"
utf8.split(/\n/n).collect{|line|
Oniguruma::ORegexp.new("[\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
}.join("\n").chop!
end
utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|") + "|\s"), " ")
utf8
end
|