Module: NKF

Defined in:
nkf.c

Constant Summary collapse

AUTO =

Auto-Detect

INT2FIX(_AUTO)
JIS =

ISO-2022-JP

INT2FIX(_JIS)
EUC =

EUC-JP

INT2FIX(_EUC)
SJIS =

Shift_JIS

INT2FIX(_SJIS)
BINARY =

BINARY

INT2FIX(_BINARY)
NOCONV =

No conversion

INT2FIX(_NOCONV)
ASCII =

ASCII

INT2FIX(_ASCII)
UTF8 =

UTF-8

INT2FIX(_UTF8)
UTF16 =

UTF-16

INT2FIX(_UTF16)
UTF32 =

UTF-32

INT2FIX(_UTF32)
UNKNOWN =

UNKNOWN

INT2FIX(_UNKNOWN)
VERSION =

Full version string of nkf

rb_str_new2(RUBY_NKF_VERSION)
NKF_VERSION =

Version of nkf

rb_str_new2(NKF_VERSION)
NKF_RELEASE_DATE =

Release date of nkf

rb_str_new2(NKF_RELEASE_DATE)

Class Method Summary collapse

Class Method Details

.guess1(str) ⇒ Integer

Returns guessed encoding of str as integer.

Algorithm described in: Ken Lunde. 'Understanding Japanese Information Processing' Sebastopol, CA: O'Reilly & Associates.

case NKF.guess1(input)
when NKF::JIS
  "ISO-2022-JP"
when NKF::SJIS
  "Shift_JIS"
when NKF::EUC
  "EUC-JP"
when NKF::UNKNOWN
  "UNKNOWN(ASCII)"
when NKF::BINARY
  "BINARY"
end

Returns:

  • (Integer)


# File 'nkf.c'

static VALUE
rb_nkf_guess1(obj, src)
  VALUE obj, src;
{
  unsigned char *p;
  unsigned char *pend;
  int sequence_counter = 0;

  StringValue(src);
  p = (unsigned char *)RSTRING(src)->ptr;
  pend = p + RSTRING(src)->len;
  if (p == pend) return INT2FIX(_UNKNOWN);

#define INCR do {\
p++;\
if (p==pend) return INT2FIX(_UNKNOWN);\
sequence_counter++;\
if (sequence_counter % 2 == 1 && *p != 0xa4)\
    sequence_counter = 0;\
if (6 <= sequence_counter) {\
sequence_counter = 0;\
return INT2FIX(_EUC);\
}

.guess2(str) ⇒ Integer

Returns guessed encoding of str as integer by nkf routine.

case NKF.guess(input)
when NKF::ASCII
  "ASCII"
when NKF::JIS
  "ISO-2022-JP"
when NKF::SJIS
  "Shift_JIS"
when NKF::EUC
  "EUC-JP"
when NKF::UTF8
  "UTF-8"
when NKF::UTF16
  "UTF-16"
when NKF::UNKNOWN
  "UNKNOWN"
when NKF::BINARY
  "BINARY"
end

Returns:

  • (Integer)


# File 'nkf.c'

static VALUE
rb_nkf_guess2(obj, src)
  VALUE obj, src;
{
  int code = _BINARY;

  reinit();

  input_ctr = 0;
  StringValue(src);
  input = (unsigned char *)RSTRING(src)->ptr;
  i_len = RSTRING(src)->len;

  if(x0201_f == WISH_TRUE)
x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);

  guess_f = TRUE;
  kanji_convert( NULL );
  guess_f = FALSE;

  if (!is_inputcode_mixed) {
if (strcmp(input_codename, "") == 0) {
  code = _ASCII;
}

.nkf(opt, str) ⇒ String

Convert str and return converted result. Conversion details are specified by opt as String.

require 'nkf'
output = NKF.nkf("-s", input)

Note By default, nkf decodes MIME encoded string. If you want not to decode input, use NKF.nkf with -m0 flag.

Returns:



# File 'nkf.c'

static VALUE
rb_nkf_kconv(obj, opt, src)
  VALUE obj, opt, src;
{
  char *opt_ptr, *opt_end;
  volatile VALUE v;

  reinit();
  StringValue(opt);
  opt_ptr = RSTRING(opt)->ptr;
  opt_end = opt_ptr + RSTRING(opt)->len;
  nkf_split_options(opt_ptr);

  incsize = INCSIZE;

  input_ctr = 0;
  StringValue(src);
  input = (unsigned char *)RSTRING(src)->ptr;
  i_len = RSTRING(src)->len;
  result = rb_str_new(0, i_len*3 + 10);
  v = result;

  output_ctr = 0;
  output     = (unsigned char *)RSTRING(result)->ptr;
  o_len      = RSTRING(result)->len;
  *output    = '\0';

  if(x0201_f == WISH_TRUE)
    x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);

  kanji_convert(NULL);
  RSTRING(result)->ptr[output_ctr] = '\0';
  RSTRING(result)->len = output_ctr;
  OBJ_INFECT(result, src);

  return result;
}