Module: NKF

Defined in:
nkf.c

Constant Summary collapse

AUTO =
Qnil
NOCONV =
Qnil
UNKNOWN =
Qnil
BINARY =
rb_enc_from_encoding(rb_nkf_enc_get("BINARY"))
ASCII =
rb_enc_from_encoding(rb_nkf_enc_get("US-ASCII"))
JIS =
rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP"))
EUC =
rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP"))
SJIS =
rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS"))
UTF8 =
rb_enc_from_encoding(rb_utf8_encoding())
UTF16 =
rb_enc_from_encoding(rb_nkf_enc_get("UTF-16BE"))
UTF32 =
rb_enc_from_encoding(rb_nkf_enc_get("UTF-32BE"))
VERSION =

Full version string of nkf

rb_str_new2(RUBY_NKF_VERSION)
NKF_VERSION =

Version of nkf

rb_str_new2(NKF_VERSION)
NKF_RELEASE_DATE =

Release date of nkf

rb_str_new2(NKF_RELEASE_DATE)

Class Method Summary collapse

Class Method Details

.guess(str) ⇒ Encoding

Returns guessed encoding of str by nkf routine.

Returns:

  • (Encoding)


192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'nkf.c', line 192

static VALUE
rb_nkf_guess(VALUE obj, VALUE src)
{
    reinit();

    input_ctr = 0;
    StringValue(src);
    input = (unsigned char *)RSTRING_PTR(src);
    i_len = RSTRING_LENINT(src);

    guess_f = TRUE;
    kanji_convert( NULL );
    guess_f = FALSE;

    return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code()));
}

.nkf(opt, str) ⇒ String

Convert str and return converted result. Conversion details are specified by opt as String.

require 'nkf'
output = NKF.nkf("-s", input)

Returns:



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'nkf.c', line 135

static VALUE
rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
{
    VALUE tmp;
    reinit();
    StringValue(opt);
    nkf_split_options(RSTRING_PTR(opt));
    if (!output_encoding) rb_raise(rb_eArgError, "no output encoding given");

    switch (nkf_enc_to_index(output_encoding)) {
    case UTF_8_BOM:    output_encoding = nkf_enc_from_index(UTF_8); break;
    case UTF_16BE_BOM: output_encoding = nkf_enc_from_index(UTF_16BE); break;
    case UTF_16LE_BOM: output_encoding = nkf_enc_from_index(UTF_16LE); break;
    case UTF_32BE_BOM: output_encoding = nkf_enc_from_index(UTF_32BE); break;
    case UTF_32LE_BOM: output_encoding = nkf_enc_from_index(UTF_32LE); break;
    }
    output_bom_f = FALSE;

    incsize = INCSIZE;

    input_ctr = 0;
    StringValue(src);
    input = (unsigned char *)RSTRING_PTR(src);
    i_len = RSTRING_LENINT(src);
    tmp = rb_str_new(0, i_len*3 + 10);

    output_ctr = 0;
    output     = (unsigned char *)RSTRING_PTR(tmp);
    o_len      = RSTRING_LENINT(tmp);
    *output    = '\0';

    /* use _result_ begin*/
    result = tmp;
    kanji_convert(NULL);
    result = Qnil;
    /* use _result_ end */

    rb_str_set_len(tmp, output_ctr);
    OBJ_INFECT(tmp, src);

    if (mimeout_f)
	rb_enc_associate(tmp, rb_usascii_encoding());
    else
	rb_enc_associate(tmp, rb_nkf_enc_get(nkf_enc_name(output_encoding)));

    return tmp;
}