Module: Unicode

Defined in:: ext/unicode/unicode.c

Constant Summary collapse

VERSION =

rb_str_new2(UNICODE_VERSION)

Class Method Summary collapse

.abbr_categories(str) ⇒ Object

wstr will be freed in get_text_elements_ensure().
.capitalize(str) ⇒ Object
.categories(str) ⇒ Object
.compose(str) ⇒ Object
.decompose(str) ⇒ Object
.decompose_compat(str) ⇒ Object
.decompose_safe(str) ⇒ Object
.downcase(str) ⇒ Object
.nfc(str) ⇒ Object
.nfc_safe(str) ⇒ Object
.nfd(str) ⇒ Object

aliases.
.nfd_safe(str) ⇒ Object
.nfkc(str) ⇒ Object
.nfkd(str) ⇒ Object
.normalize_C(str) ⇒ Object
.normalize_C_safe(str) ⇒ Object
.normalize_D(str) ⇒ Object
.normalize_D_safe(str) ⇒ Object
.normalize_KC(str) ⇒ Object
.normalize_KD(str) ⇒ Object
.strcmp(str1, str2) ⇒ Object
.strcmp_compat(str1, str2) ⇒ Object
.text_elements(str) ⇒ Object
.upcase(str) ⇒ Object
.width(*args) ⇒ Object

wstr will be freed in get_text_elements_ensure().

Class Method Details

.abbr_categories(str) ⇒ `Object`

wstr will be freed in get_text_elements_ensure()

# File 'ext/unicode/unicode.c', line 1045

VALUE
unicode_get_abbr_categories(VALUE obj, VALUE str)
{
  WString wstr;
  get_categories_param param = { &wstr, str, catname_abbr };

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));

  return rb_ensure(get_categories_internal, (VALUE)&param,
                   get_categories_ensure, (VALUE)&wstr);
  /* wstr will be freed in get_text_elements_ensure() */
}

.capitalize(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 964

static VALUE
unicode_capitalize(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  capitalize_internal(&ustr, &result);
  //sort_canonical(&result);
  WStr_free(&ustr);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.categories(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 1027

VALUE
unicode_get_categories(VALUE obj, VALUE str)
{
  WString wstr;
  get_categories_param param = { &wstr, str, catname_long };

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));

  return rb_ensure(get_categories_internal, (VALUE)&param,
                   get_categories_ensure, (VALUE)&wstr);
  /* wstr will be freed in get_text_elements_ensure() */
}

.compose(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 791

static VALUE
unicode_compose(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  sort_canonical(&ustr);
  WStr_alloc(&result);
  compose_internal(&ustr, &result);
  WStr_free(&ustr);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.decompose(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 713

static VALUE
unicode_decompose(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.decompose_compat(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 765

static VALUE
unicode_decompose_compat(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_compat_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.decompose_safe(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 739

static VALUE
unicode_decompose_safe(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_safe_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.downcase(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 933

static VALUE
unicode_downcase(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  downcase_internal(&ustr, &result);
  //sort_canonical(&result);
  WStr_free(&ustr);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.nfc(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 817

static VALUE
unicode_normalize_C(VALUE obj, VALUE str)
{
  WString ustr1;
  WString ustr2;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&ustr2);
  decompose_internal(&ustr1, &ustr2);
  WStr_free(&ustr1);
  sort_canonical(&ustr2);
  WStr_alloc(&result);
  compose_internal(&ustr2, &result);
  WStr_free(&ustr2);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.nfc_safe(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 847

static VALUE
unicode_normalize_safe(VALUE obj, VALUE str)
{
  WString ustr1;
  WString ustr2;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&ustr2);
  decompose_safe_internal(&ustr1, &ustr2);
  WStr_free(&ustr1);
  sort_canonical(&ustr2);
  WStr_alloc(&result);
  compose_internal(&ustr2, &result);
  WStr_free(&ustr2);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.nfd(str) ⇒ `Object`

aliases

# File 'ext/unicode/unicode.c', line 713

static VALUE
unicode_decompose(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.nfd_safe(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 739

static VALUE
unicode_decompose_safe(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_safe_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.nfkc(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 877

static VALUE
unicode_normalize_KC(VALUE obj, VALUE str)
{
  WString ustr1;
  WString ustr2;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&ustr2);
  decompose_compat_internal(&ustr1, &ustr2);
  WStr_free(&ustr1);
  sort_canonical(&ustr2);
  WStr_alloc(&result);
  compose_internal(&ustr2, &result);
  WStr_free(&ustr2);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.nfkd(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 765

static VALUE
unicode_decompose_compat(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_compat_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.normalize_C(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 817

static VALUE
unicode_normalize_C(VALUE obj, VALUE str)
{
  WString ustr1;
  WString ustr2;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&ustr2);
  decompose_internal(&ustr1, &ustr2);
  WStr_free(&ustr1);
  sort_canonical(&ustr2);
  WStr_alloc(&result);
  compose_internal(&ustr2, &result);
  WStr_free(&ustr2);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.normalize_C_safe(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 847

static VALUE
unicode_normalize_safe(VALUE obj, VALUE str)
{
  WString ustr1;
  WString ustr2;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&ustr2);
  decompose_safe_internal(&ustr1, &ustr2);
  WStr_free(&ustr1);
  sort_canonical(&ustr2);
  WStr_alloc(&result);
  compose_internal(&ustr2, &result);
  WStr_free(&ustr2);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.normalize_D(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 713

static VALUE
unicode_decompose(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.normalize_D_safe(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 739

static VALUE
unicode_decompose_safe(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_safe_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.normalize_KC(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 877

static VALUE
unicode_normalize_KC(VALUE obj, VALUE str)
{
  WString ustr1;
  WString ustr2;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr1, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&ustr2);
  decompose_compat_internal(&ustr1, &ustr2);
  WStr_free(&ustr1);
  sort_canonical(&ustr2);
  WStr_alloc(&result);
  compose_internal(&ustr2, &result);
  WStr_free(&ustr2);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.normalize_KD(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 765

static VALUE
unicode_decompose_compat(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  decompose_compat_internal(&ustr, &result);
  WStr_free(&ustr);
  sort_canonical(&result);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.strcmp(str1, str2) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 629

static VALUE
unicode_strcmp(VALUE obj, VALUE str1, VALUE str2)
{
  WString wstr1;
  WString wstr2;
  WString result1;
  WString result2;
  UString ustr1;
  UString ustr2;
  int ret;

  Check_Type(str1, T_STRING);
  Check_Type(str2, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str1);
  CONVERT_TO_UTF8(str2);
#endif
  WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1));
  WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2));
  WStr_alloc(&result1);
  WStr_alloc(&result2);
  decompose_internal(&wstr1, &result1);
  decompose_internal(&wstr2, &result2);
  WStr_free(&wstr1);
  WStr_free(&wstr2);
  sort_canonical(&result1);
  sort_canonical(&result2);
  UniStr_alloc(&ustr1);
  UniStr_alloc(&ustr2);
  WStr_convertIntoUString(&result1, &ustr1);
  WStr_convertIntoUString(&result2, &ustr2);
  WStr_free(&result1);
  WStr_free(&result2);
  UniStr_addChar(&ustr1, '\0');
  UniStr_addChar(&ustr2, '\0');
  ret = strcmp((char*)ustr1.str, (char*)ustr2.str);
  UniStr_free(&ustr1);
  UniStr_free(&ustr2);

  return INT2FIX(ret);
}

.strcmp_compat(str1, str2) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 671

static VALUE
unicode_strcmp_compat(VALUE obj, VALUE str1, VALUE str2)
{
  WString wstr1;
  WString wstr2;
  WString result1;
  WString result2;
  UString ustr1;
  UString ustr2;
  int ret;

  Check_Type(str1, T_STRING);
  Check_Type(str2, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str1);
  CONVERT_TO_UTF8(str2);
#endif
  WStr_allocWithUTF8L(&wstr1, RSTRING_PTR(str1), RSTRING_LEN(str1));
  WStr_allocWithUTF8L(&wstr2, RSTRING_PTR(str2), RSTRING_LEN(str2));
  WStr_alloc(&result1);
  WStr_alloc(&result2);
  decompose_compat_internal(&wstr1, &result1);
  decompose_compat_internal(&wstr2, &result2);
  WStr_free(&wstr1);
  WStr_free(&wstr2);
  sort_canonical(&result1);
  sort_canonical(&result2);
  UniStr_alloc(&ustr1);
  UniStr_alloc(&ustr2);
  WStr_convertIntoUString(&result1, &ustr1);
  WStr_convertIntoUString(&result2, &ustr2);
  WStr_free(&result1);
  WStr_free(&result2);
  UniStr_addChar(&ustr1, '\0');
  UniStr_addChar(&ustr2, '\0');
  ret = strcmp((char*)ustr1.str, (char*)ustr2.str);
  UniStr_free(&ustr1);
  UniStr_free(&ustr2);

  return INT2FIX(ret);
}

.text_elements(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 1215

VALUE
unicode_get_text_elements(VALUE obj, VALUE str)
{
  WString wstr;
  get_text_elements_param param = { &wstr, str };

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));

  return rb_ensure(get_text_elements_internal, (VALUE)&param,
                   get_text_elements_ensure, (VALUE)&wstr);
  /* wstr will be freed in get_text_elements_ensure() */
}

.upcase(str) ⇒ `Object`

# File 'ext/unicode/unicode.c', line 907

static VALUE
unicode_upcase(VALUE obj, VALUE str)
{
  WString ustr;
  WString result;
  UString ret;
  VALUE vret;

  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&ustr, RSTRING_PTR(str), RSTRING_LEN(str));
  WStr_alloc(&result);
  upcase_internal(&ustr, &result);
  //sort_canonical(&result);
  WStr_free(&ustr);
  UniStr_alloc(&ret);
  WStr_convertIntoUString(&result, &ret);
  WStr_free(&result);
  vret = TO_(str, ENC_(rb_str_new((char*)ret.str, ret.len)));
  UniStr_free(&ret);

  return vret;
}

.width(*args) ⇒ `Object`

wstr will be freed in get_text_elements_ensure()

# File 'ext/unicode/unicode.c', line 1062

VALUE
unicode_wcswidth(int argc, VALUE* argv, VALUE obj)
{
  WString wstr;
  int i, count;
  int width = 0;
  int cjk_p = 0;
  VALUE str;
  VALUE cjk;

  count = rb_scan_args(argc, argv, "11", &str, &cjk);
  if (count > 1)
    cjk_p = RTEST(cjk);
  Check_Type(str, T_STRING);
#ifdef HAVE_RUBY_ENCODING_H
  CONVERT_TO_UTF8(str);
#endif
  WStr_allocWithUTF8L(&wstr, RSTRING_PTR(str), RSTRING_LEN(str));
  for (i = 0; i <wstr.len; i++) {
    int c = wstr.str[i];
    int cat = get_gencat(c);
    int eaw = get_eawidth(c);
    if ((c > 0 && c < 32) || (c >= 0x7f && c < 0xa0)) {
      /* Control Characters */
      width = -1;
      break;
    }
    else if (c != 0x00ad && /* SOFT HYPHEN */
             (cat == c_Mn || cat == c_Me || /* Non-spacing Marks */
              cat == c_Cf || /* Format */
              c == 0 || /* NUL */
              (c >= 0x1160 && c <= 0x11ff))) /* HANGUL JUNGSEONG/JONGSEONG */
      /* zero width */ ;
    else if (eaw == w_F || eaw == w_W || /* Fullwidth or Wide */
             (c >= 0x4db6 && c <= 0x4dbf) || /* CJK Reserved */
             (c >= 0x9fcd && c <= 0x9fff) || /* CJK Reserved */
             (c >= 0xfa6e && c <= 0xfa6f) || /* CJK Reserved */
             (c >= 0xfada && c <= 0xfaff) || /* CJK Reserved */
             (c >= 0x2a6d7 && c <= 0x2a6ff) || /* CJK Reserved */
             (c >= 0x2b735 && c <= 0x2b73f) || /* CJK Reserved */
             (c >= 0x2b81e && c <= 0x2f7ff) || /* CJK Reserved */
             (c >= 0x2fa1e && c <= 0x2fffd) || /* CJK Reserved */
             (c >= 0x30000 && c <= 0x3fffd) || /* CJK Reserved */
             (cjk_p && eaw == w_A)) /* East Asian Ambiguous */
      width += 2;
    else
      width++; /* Halfwidth or Neutral */
  }
  WStr_free(&wstr);

  return INT2FIX(width);
}

Module: Unicode

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.abbr_categories(str) ⇒ Object

.capitalize(str) ⇒ Object

.categories(str) ⇒ Object

.compose(str) ⇒ Object

.decompose(str) ⇒ Object

.decompose_compat(str) ⇒ Object

.decompose_safe(str) ⇒ Object

.downcase(str) ⇒ Object

.nfc(str) ⇒ Object

.nfc_safe(str) ⇒ Object

.nfd(str) ⇒ Object

.nfd_safe(str) ⇒ Object

.nfkc(str) ⇒ Object

.nfkd(str) ⇒ Object

.normalize_C(str) ⇒ Object

.normalize_C_safe(str) ⇒ Object

.normalize_D(str) ⇒ Object

.normalize_D_safe(str) ⇒ Object

.normalize_KC(str) ⇒ Object

.normalize_KD(str) ⇒ Object

.strcmp(str1, str2) ⇒ Object

.strcmp_compat(str1, str2) ⇒ Object

.text_elements(str) ⇒ Object

.upcase(str) ⇒ Object

.width(*args) ⇒ Object

.abbr_categories(str) ⇒ `Object`

.capitalize(str) ⇒ `Object`

.categories(str) ⇒ `Object`

.compose(str) ⇒ `Object`

.decompose(str) ⇒ `Object`

.decompose_compat(str) ⇒ `Object`

.decompose_safe(str) ⇒ `Object`

.downcase(str) ⇒ `Object`

.nfc(str) ⇒ `Object`

.nfc_safe(str) ⇒ `Object`

.nfd(str) ⇒ `Object`

.nfd_safe(str) ⇒ `Object`

.nfkc(str) ⇒ `Object`

.nfkd(str) ⇒ `Object`

.normalize_C(str) ⇒ `Object`

.normalize_C_safe(str) ⇒ `Object`

.normalize_D(str) ⇒ `Object`

.normalize_D_safe(str) ⇒ `Object`

.normalize_KC(str) ⇒ `Object`

.normalize_KD(str) ⇒ `Object`

.strcmp(str1, str2) ⇒ `Object`

.strcmp_compat(str1, str2) ⇒ `Object`

.text_elements(str) ⇒ `Object`

.upcase(str) ⇒ `Object`

.width(*args) ⇒ `Object`