Module: NKF
- Defined in:
- nkf.c
Constant Summary collapse
- AUTO =
Auto-Detect
INT2FIX(_AUTO)
- JIS =
ISO-2022-JP
INT2FIX(_JIS)
- EUC =
EUC-JP
INT2FIX(_EUC)
- SJIS =
Shift_JIS
INT2FIX(_SJIS)
- BINARY =
BINARY
INT2FIX(_BINARY)
- NOCONV =
No conversion
INT2FIX(_NOCONV)
- ASCII =
ASCII
INT2FIX(_ASCII)
- UTF8 =
UTF-8
INT2FIX(_UTF8)
- UTF16 =
UTF-16
INT2FIX(_UTF16)
- UTF32 =
UTF-32
INT2FIX(_UTF32)
- UNKNOWN =
UNKNOWN
INT2FIX(_UNKNOWN)
- VERSION =
Full version string of nkf
rb_str_new2(RUBY_NKF_VERSION)
- NKF_VERSION =
Version of nkf
rb_str_new2(NKF_VERSION)
- NKF_RELEASE_DATE =
Release date of nkf
rb_str_new2(NKF_RELEASE_DATE)
Class Method Summary collapse
-
.guess1(str) ⇒ Integer
Returns guessed encoding of str as integer.
-
.guess2(str) ⇒ Integer
Returns guessed encoding of str as integer by nkf routine.
-
.nkf(opt, str) ⇒ String
Convert str and return converted result.
Class Method Details
.guess1(str) ⇒ Integer
Returns guessed encoding of str as integer.
Algorithm described in: Ken Lunde. 'Understanding Japanese Information Processing' Sebastopol, CA: O'Reilly & Associates.
case NKF.guess1(input)
when NKF::JIS
"ISO-2022-JP"
when NKF::SJIS
"Shift_JIS"
when NKF::EUC
"EUC-JP"
when NKF::UNKNOWN
"UNKNOWN(ASCII)"
when NKF::BINARY
"BINARY"
end
|
# File 'nkf.c'
static VALUE
rb_nkf_guess1(obj, src)
VALUE obj, src;
{
unsigned char *p;
unsigned char *pend;
int sequence_counter = 0;
StringValue(src);
p = (unsigned char *)RSTRING(src)->ptr;
pend = p + RSTRING(src)->len;
if (p == pend) return INT2FIX(_UNKNOWN);
#define INCR do {\
p++;\
if (p==pend) return INT2FIX(_UNKNOWN);\
sequence_counter++;\
if (sequence_counter % 2 == 1 && *p != 0xa4)\
sequence_counter = 0;\
if (6 <= sequence_counter) {\
sequence_counter = 0;\
return INT2FIX(_EUC);\
}
|
.guess2(str) ⇒ Integer
Returns guessed encoding of str as integer by nkf routine.
case NKF.guess(input)
when NKF::ASCII
"ASCII"
when NKF::JIS
"ISO-2022-JP"
when NKF::SJIS
"Shift_JIS"
when NKF::EUC
"EUC-JP"
when NKF::UTF8
"UTF-8"
when NKF::UTF16
"UTF-16"
when NKF::UNKNOWN
"UNKNOWN"
when NKF::BINARY
"BINARY"
end
|
# File 'nkf.c'
static VALUE
rb_nkf_guess2(obj, src)
VALUE obj, src;
{
int code = _BINARY;
reinit();
input_ctr = 0;
StringValue(src);
input = (unsigned char *)RSTRING(src)->ptr;
i_len = RSTRING(src)->len;
if(x0201_f == WISH_TRUE)
x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
guess_f = TRUE;
kanji_convert( NULL );
guess_f = FALSE;
if (!is_inputcode_mixed) {
if (strcmp(input_codename, "") == 0) {
code = _ASCII;
}
|
.nkf(opt, str) ⇒ String
Convert str and return converted result. Conversion details are specified by opt as String.
require 'nkf'
output = NKF.nkf("-s", input)
Note By default, nkf decodes MIME encoded string. If you want not to decode input, use NKF.nkf with -m0 flag.
|
# File 'nkf.c'
static VALUE
rb_nkf_kconv(obj, opt, src)
VALUE obj, opt, src;
{
char *opt_ptr, *opt_end;
volatile VALUE v;
reinit();
StringValue(opt);
opt_ptr = RSTRING(opt)->ptr;
opt_end = opt_ptr + RSTRING(opt)->len;
nkf_split_options(opt_ptr);
incsize = INCSIZE;
input_ctr = 0;
StringValue(src);
input = (unsigned char *)RSTRING(src)->ptr;
i_len = RSTRING(src)->len;
result = rb_str_new(0, i_len*3 + 10);
v = result;
output_ctr = 0;
output = (unsigned char *)RSTRING(result)->ptr;
o_len = RSTRING(result)->len;
*output = '\0';
if(x0201_f == WISH_TRUE)
x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
kanji_convert(NULL);
RSTRING(result)->ptr[output_ctr] = '\0';
RSTRING(result)->len = output_ctr;
OBJ_INFECT(result, src);
return result;
}
|