120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
# File 'lib/unicode_normalize/normalize.rb', line 120
def self.normalize(string, form = :nfc)
encoding = string.encoding
case encoding
when Encoding::UTF_8
case form
when :nfc then
string.gsub REGEXP_C, NF_HASH_C
when :nfd then
string.gsub REGEXP_D, NF_HASH_D
when :nfkc then
string.gsub(REGEXP_K, KOMPATIBLE_TABLE).gsub(REGEXP_C, NF_HASH_C)
when :nfkd then
string.gsub(REGEXP_K, KOMPATIBLE_TABLE).gsub(REGEXP_D, NF_HASH_D)
else
raise ArgumentError, "Invalid normalization form #{form}."
end
when Encoding::US_ASCII
string
when *UNICODE_ENCODINGS
normalize(string.encode(Encoding::UTF_8), form).encode(encoding)
else
raise Encoding::CompatibilityError, "Unicode Normalization not appropriate for #{encoding}"
end
end
|