Method: Glib.utf8_normalize

Defined in:
ext/glib/glib.c

.utf_normalize(string, form) ⇒ Object

Returns the normalized form of the string. See www.unicode.org/reports/tr15/tr15-29.html for more information about normalization.

form can be one of the following: :c, :kc, :d, or :kd.

decomposed = [101, 769].pack('U*')
composed = Glib.utf8_normalize(decomposed, :kc)
composed.unpack('U*') #=> [233]


96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'ext/glib/glib.c', line 96

static VALUE utf8_normalize(VALUE self, VALUE string, VALUE form)
{
  VALUE result;
  gchar *temp;
  GNormalizeMode mode;

  Check_Type(string, T_STRING);
  Check_Type(form, T_SYMBOL);

  if (ID2SYM(rb_intern("d")) == form) {
    mode = G_NORMALIZE_NFD;
  } else if (ID2SYM(rb_intern("c")) == form) {
    mode = G_NORMALIZE_NFC;
  } else if (ID2SYM(rb_intern("kd")) == form) {
    mode = G_NORMALIZE_NFKD;
  } else if (ID2SYM(rb_intern("kc")) == form) {
    mode = G_NORMALIZE_NFKC;
  } else {
    rb_raise(rb_eArgError, "%s is not a valid normalization form, options are: :d, :kd, :c, or :kc", RSTRING(rb_inspect(form))->ptr);
  }

  temp = g_utf8_normalize(StringValuePtr(string), RSTRING(string)->len, mode);
  result = rb_str_new2(temp);

  return result;
}