Module: UTF8Proc
- Includes:
- JRuby
- Defined in:
- lib/utf8_proc.rb,
lib/utf8_proc/jruby.rb,
lib/utf8_proc/version.rb,
lib/utf8_proc/benchmark.rb,
lib/utf8_proc/core_ext/string.rb,
lib/utf8_proc/core_ext/string_jruby.rb,
ext/utf8_proc/utf8_proc.c
Overview
Unicode string normalization library using UTF8Proc
Defined Under Namespace
Modules: Benchmark, JRuby, StringExtension
Constant Summary collapse
- VERSION =
The gem version
"0.6.0".freeze
- LIBRARY_VERSION =
Displays the library version of the utf8proc library
rb_str_freeze( rb_enc_str_new(libVersion, strlen(libVersion), enc_utf8) )
Class Method Summary collapse
-
.NFC(string) ⇒ String
(also: nfc)
Normalizes a String using NFC (Canonical Decomposition, followed by Canonical Composition).
-
.NFD(string) ⇒ String
(also: nfd)
Normalizes a string using NFD (Canonical Decomposition).
-
.NFKC(string) ⇒ String
(also: nfkc)
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition).
-
.NFKC_CF(string) ⇒ String
(also: nfkc_cf)
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition) with case-folding.
-
.NFKD(string) ⇒ String
(also: nfkd)
Normalizes a string using NFKD (Compatibility Decomposition).
-
.normalize(string, form = :nfc) ⇒ String
Normalizes a string according to one of the 5 possible forms.
Class Method Details
.NFC(string) ⇒ String Also known as: nfc
Normalizes a String using NFC (Canonical Decomposition, followed by Canonical Composition)
77 78 79 |
# File 'ext/utf8_proc/utf8_proc.c', line 77
static VALUE toNFC(VALUE self, VALUE string) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
}
|
.NFD(string) ⇒ String Also known as: nfd
Normalizes a string using NFD (Canonical Decomposition)
100 101 102 |
# File 'ext/utf8_proc/utf8_proc.c', line 100
static VALUE toNFD(VALUE self, VALUE string) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
}
|
.NFKC(string) ⇒ String Also known as: nfkc
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition)
123 124 125 |
# File 'ext/utf8_proc/utf8_proc.c', line 123
static VALUE toNFKC(VALUE self, VALUE string) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
}
|
.NFKC_CF(string) ⇒ String Also known as: nfkc_cf
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition) with case-folding
169 170 171 |
# File 'ext/utf8_proc/utf8_proc.c', line 169
static VALUE toNFKC_CF(VALUE self, VALUE string) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
}
|
.NFKD(string) ⇒ String Also known as: nfkd
Normalizes a string using NFKD (Compatibility Decomposition)
146 147 148 |
# File 'ext/utf8_proc/utf8_proc.c', line 146
static VALUE toNFKD(VALUE self, VALUE string) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
}
|
.normalize(string, form = :nfc) ⇒ String
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
# File 'ext/utf8_proc/utf8_proc.c', line 195
static VALUE toNorm(int argc, VALUE* argv, VALUE self){
VALUE string;
VALUE form;
rb_scan_args(argc, argv, "11", &string, &form);
if (NIL_P(form)) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
}
ID s_form;
s_form = SYM2ID(form);
if (s_form == NFC) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
} else if (s_form == NFD) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
} else if (s_form == NFKC) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
} else if (s_form == NFKD) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
} else if (s_form == NFKC_CF) {
return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
} else {
rb_raise(rb_eArgError, "%s",
"Second argument must be one of [:nfc (default), :nfd, :nfkc, " \
":nfkd, :nfkc_cf]");
}
}
|