Method: UnicodeNormalize.normalize

Defined in:
lib/unicode_normalize/normalize.rb

.normalize(string, form = :nfc) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/unicode_normalize/normalize.rb', line 120

def self.normalize(string, form = :nfc)
  encoding = string.encoding
  case encoding
  when Encoding::UTF_8
    case form
    when :nfc then
      string.gsub REGEXP_C, NF_HASH_C
    when :nfd then
      string.gsub REGEXP_D, NF_HASH_D
    when :nfkc then
      string.gsub(REGEXP_K, KOMPATIBLE_TABLE).gsub(REGEXP_C, NF_HASH_C)
    when :nfkd then
      string.gsub(REGEXP_K, KOMPATIBLE_TABLE).gsub(REGEXP_D, NF_HASH_D)
    else
      raise ArgumentError, "Invalid normalization form #{form}."
    end
  when Encoding::US_ASCII
    string
  when *UNICODE_ENCODINGS
    normalize(string.encode(Encoding::UTF_8), form).encode(encoding)
  else
    raise Encoding::CompatibilityError, "Unicode Normalization not appropriate for #{encoding}"
  end
end