Module: UTF8Proc
- Includes:
- JRuby
- Defined in:
- lib/utf8_proc.rb,
lib/utf8_proc/jruby.rb,
lib/utf8_proc/version.rb,
lib/utf8_proc/benchmark.rb,
lib/utf8_proc/core_ext/string.rb,
lib/utf8_proc/core_ext/string_jruby.rb,
ext/utf8_proc/utf8_proc.c
Overview
Unicode string normalization library using UTF8Proc
Defined Under Namespace
Modules: Benchmark, JRuby, StringExtension
Constant Summary collapse
- VERSION =
The gem version
"0.6.0".freeze
- LIBRARY_VERSION =
Displays the library version of the utf8proc library
rb_str_freeze( rb_enc_str_new(libVersion, strlen(libVersion), enc_utf8) )
Class Method Summary collapse
-
.NFC(string) ⇒ String
(also: nfc)
Normalizes a String using NFC (Canonical Decomposition, followed by Canonical Composition).
-
.NFD(string) ⇒ String
(also: nfd)
Normalizes a string using NFD (Canonical Decomposition).
-
.NFKC(string) ⇒ String
(also: nfkc)
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition).
-
.NFKC_CF(string) ⇒ String
(also: nfkc_cf)
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition) with case-folding.
-
.NFKD(string) ⇒ String
(also: nfkd)
Normalizes a string using NFKD (Compatibility Decomposition).
-
.normalize(string, form = :nfc) ⇒ String
Normalizes a string according to one of the 5 possible forms.
Class Method Details
.NFC(string) ⇒ String Also known as: nfc
Normalizes a String using NFC (Canonical Decomposition, followed by Canonical Composition)
77 78 79 |
# File 'ext/utf8_proc/utf8_proc.c', line 77 static VALUE toNFC(VALUE self, VALUE string) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE); } |
.NFD(string) ⇒ String Also known as: nfd
Normalizes a string using NFD (Canonical Decomposition)
100 101 102 |
# File 'ext/utf8_proc/utf8_proc.c', line 100 static VALUE toNFD(VALUE self, VALUE string) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE); } |
.NFKC(string) ⇒ String Also known as: nfkc
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition)
123 124 125 |
# File 'ext/utf8_proc/utf8_proc.c', line 123 static VALUE toNFKC(VALUE self, VALUE string) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT); } |
.NFKC_CF(string) ⇒ String Also known as: nfkc_cf
Normalizes a string using NFKC (Compatibility Decomposition, followed by Canonical Composition) with case-folding
169 170 171 |
# File 'ext/utf8_proc/utf8_proc.c', line 169 static VALUE toNFKC_CF(VALUE self, VALUE string) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD); } |
.NFKD(string) ⇒ String Also known as: nfkd
Normalizes a string using NFKD (Compatibility Decomposition)
146 147 148 |
# File 'ext/utf8_proc/utf8_proc.c', line 146 static VALUE toNFKD(VALUE self, VALUE string) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT); } |
.normalize(string, form = :nfc) ⇒ String
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
# File 'ext/utf8_proc/utf8_proc.c', line 195 static VALUE toNorm(int argc, VALUE* argv, VALUE self){ VALUE string; VALUE form; rb_scan_args(argc, argv, "11", &string, &form); if (NIL_P(form)) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE); } ID s_form; s_form = SYM2ID(form); if (s_form == NFC) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE); } else if (s_form == NFD) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE); } else if (s_form == NFKC) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT); } else if (s_form == NFKD) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT); } else if (s_form == NFKC_CF) { return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD); } else { rb_raise(rb_eArgError, "%s", "Second argument must be one of [:nfc (default), :nfd, :nfkc, " \ ":nfkd, :nfkc_cf]"); } } |