Module: UTF8Proc::StringExtension

Defined in:
lib/utf8_proc/core_ext/string.rb,
ext/utf8_proc/utf8_proc.c

Overview

Module containing C core extension methods for the String class.

You can activate this by using:

require "utf8_proc/core_ext/string"

It will load either C or Java extensions, depending on your Ruby version.

Instance Method Summary collapse

Instance Method Details

#NFCString Also known as: nfc

Normalizes self using NFC (Canonical Decomposition, followed by Canonical Composition)

Returns:

  • (String)

    a normalized copy of the string

Raises:

  • (EncodingError)

    if self is not encoded in UTF-8 or US-ASCII



88
89
90
# File 'ext/utf8_proc/utf8_proc.c', line 88

static VALUE StoNFC(VALUE string) {
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
}

#NFDString Also known as: nfd

Normalizes self using NFD (Canonical Decomposition)

Returns:

  • (String)

    a normalized copy of the string

Raises:

  • (EncodingError)

    if self is not encoded in UTF-8 or US-ASCII



110
111
112
# File 'ext/utf8_proc/utf8_proc.c', line 110

static VALUE StoNFD(VALUE string) {
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
}

#NFKCString Also known as: nfkc

Normalizes self using NFKC (Compatibility Decomposition, followed by Canonical Composition)

Returns:

  • (String)

    a normalized copy of the string

Raises:

  • (EncodingError)

    if self is not encoded in UTF-8 or US-ASCII



134
135
136
# File 'ext/utf8_proc/utf8_proc.c', line 134

static VALUE StoNFKC(VALUE string) {
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
}

#NFKC_CFString Also known as: nfkc_cf

Normalizes self using NFKC (Compatibility Decomposition, followed by Canonical Composition) with case-folding

Returns:

  • (String)

    a normalized copy of the string

Raises:

  • (EncodingError)

    if self is not encoded in UTF-8 or US-ASCII



180
181
182
# File 'ext/utf8_proc/utf8_proc.c', line 180

static VALUE StoNFKC_CF(VALUE string) {
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
}

#NFKDString Also known as: nfkd

Normalizes self using NFKD (Compatibility Decomposition)

Returns:

  • (String)

    a normalized copy of the string

Raises:

  • (EncodingError)

    if self is not encoded in UTF-8 or US-ASCII



156
157
158
# File 'ext/utf8_proc/utf8_proc.c', line 156

static VALUE StoNFKD(VALUE string) {
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
}

#normalize(string, form = :nfc) ⇒ String

Normalizes self according to one of the 5 possible forms

Parameters:

  • form (:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf) (defaults to: :nfc)

    the normalization form

Returns:

  • (String)

    a normalized copy of the string

Raises:

  • (EncodingError)

    if self is not encoded in UTF-8 or US-ASCII

  • (ArgumentError)

    if form is not one of the 5 valid forms



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'ext/utf8_proc/utf8_proc.c', line 233

static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
  VALUE form;
  rb_scan_args(argc, argv, "01", &form);

  if (NIL_P(form)) {
    return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
  }

  ID s_form;
  s_form = SYM2ID(form);
  if (s_form == NFC) {
    return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
  } else if (s_form == NFD) {
    return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
  } else if (s_form == NFKC) {
    return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
  } else if (s_form == NFKD) {
    return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
  } else if (s_form == NFKC_CF) {
    return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
  } else {
    rb_raise(rb_eArgError, "%s",
             "Argument must be one of [:nfc (default), :nfd, :nfkc, " \
             ":nfkd, :nfkc_cf]");
  }
}