Class: Iconv

Inherits:
Data
  • Object
show all
Defined in:
ext/iconv/iconv.c,
lib/iconv.rb,
lib/iconv/version.rb,
ext/iconv/iconv.c

Overview

Summary

Ruby extension for charset conversion.

Abstract

Iconv is a wrapper class for the UNIX 95 iconv() function family, which translates string between various encoding systems.

See Open Group’s on-line documents for more details.

Which coding systems are available is platform-dependent.

Examples

  1. Simple conversion between two charsets.

    converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
    
  2. Instantiate a new Iconv and use method Iconv#iconv.

    cd = Iconv.new(to, from)
    begin
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)                   # Don't forget this!
    ensure
      cd.close
    end
    
  3. Invoke Iconv.open with a block.

    Iconv.open(to, from) do |cd|
      input.each { |s| output << cd.iconv(s) }
      output << cd.iconv(nil)
    end
    
  4. Shorthand for (3).

    Iconv.iconv(to, from, *input.to_a)
    

Attentions

Even if some extentions of implementation dependent are useful, DON’T USE those extentions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.

Defined Under Namespace

Modules: Failure Classes: BrokenLibrary, IllegalSequence, InvalidCharacter, InvalidEncoding, OutOfRange

Constant Summary collapse

VERSION =
"1.0.3"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#new(to, from, [options]) ⇒ Object

Creates new code converter from a coding-system designated with from to another one designated with to.

Parameters

to

encoding name for destination

from

encoding name for source

options

options for converter

Exceptions

TypeError

if to or from aren’t String

InvalidEncoding

if designated converter couldn’t find out

SystemCallError

if iconv_open(3) fails



721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
# File 'ext/iconv/iconv.c', line 721

static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    iconv_free(check_iconv(self));
    DATA_PTR(self) = NULL;
    DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
#ifdef HAVE_RUBY_ENCODING_H
    if (idx >= 0) ENCODING_SET(self, idx);
#endif
    return self;
}

Class Method Details

.charset_mapObject

Returns the map from canonical name to system dependent name.



192
193
194
195
196
# File 'ext/iconv/iconv.c', line 192

static VALUE
charset_map_get(void)
{
    return charset_map;
}

.conv(to, from, str) ⇒ Object

Shorthand for

Iconv.iconv(to, from, str).join

See Iconv.iconv.



833
834
835
836
837
838
839
840
841
842
843
844
# File 'ext/iconv/iconv.c', line 833

static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
    struct iconv_env_t arg;

    arg.argc = 1;
    arg.argv = &str;
    arg.append = rb_str_append;
    arg.ret = rb_str_new(0, 0);
    arg.cd = iconv_create(to, from, NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}

.ctlmethodsArray

Returns available iconvctl() method list.

Returns:

  • (Array)


1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
# File 'ext/iconv/iconv.c', line 1159

static VALUE
iconv_s_ctlmethods(VALUE klass)
{
    VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
    rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
    rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
    rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
    return ary;
}

.iconv(to, from, *strs) ⇒ Object

Shorthand for

Iconv.open(to, from) { |cd|
  (strs + [nil]).collect { |s| cd.iconv(s) }
}

Parameters

to, from

see Iconv.new

strs

strings to be converted

Exceptions

Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv.



809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
# File 'ext/iconv/iconv.c', line 809

static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
    struct iconv_env_t arg;

    if (argc < 2)		/* needs `to' and `from' arguments at least */
	rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);

    arg.argc = argc -= 2;
    arg.argv = argv + 2;
    arg.append = rb_ary_push;
    arg.ret = rb_ary_new2(argc);
    arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
    return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}

.list {|*aliases| ... } ⇒ Object

Iterates each alias sets.

Yields:

  • (*aliases)


893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
# File 'ext/iconv/iconv.c', line 893

static VALUE
iconv_s_list(void)
{
#ifdef HAVE_ICONVLIST
    int state;
    VALUE args[2];

    args[1] = rb_block_given_p() ? 0 : rb_ary_new();
    iconvlist(list_iconv, args);
    state = (int)args[0];
    if (state) rb_jump_tag(state);
    if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
    char **list;
    size_t sz, i;
    VALUE ary;

    if (__iconv_get_list(&list, &sz)) return Qnil;

    ary = rb_ary_new2(sz);
    for (i = 0; i < sz; i++) {
	rb_ary_push(ary, rb_str_new2(list[i]));
    }
    __iconv_free_list(list, sz);

    if (!rb_block_given_p())
	return ary;
    for (i = 0; i < RARRAY_LEN(ary); i++) {
	rb_yield(RARRAY_PTR(ary)[i]);
    }
#endif
    return Qnil;
}

.open(to, from) {|iconv| ... } ⇒ Object

Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.

Yields:



747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
# File 'ext/iconv/iconv.c', line 747

static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
    VALUE to, from, options, cd;
    struct rb_iconv_opt_t opt;
    int idx;

    rb_scan_args(argc, argv, "21", &to, &from, &options);
    get_iconv_opt(&opt, options);
    cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));

    self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
#ifdef HAVE_RUBY_ENCODING_H
    if (idx >= 0) ENCODING_SET(self, idx);
#endif

    if (rb_block_given_p()) {
	return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
    }
    else {
	return self;
    }
}

Instance Method Details

#closeObject

Finishes conversion.

After calling this, calling Iconv#iconv will cause an exception, but multiple calls of #close are guaranteed to end successfully.

Returns a string containing the byte sequence to change the output buffer to its initial shift state.



949
950
951
952
953
954
955
956
# File 'ext/iconv/iconv.c', line 949

static VALUE
iconv_finish(VALUE self)
{
    VALUE cd = check_iconv(self);

    if (!cd) return Qnil;
    return rb_ensure(iconv_init_state, self, iconv_free, cd);
}

#conv(str...) ⇒ Object

Equivalent to

iconv(nil, str..., nil).join


1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
# File 'ext/iconv/iconv.c', line 1038

static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
    iconv_t cd = VALUE2ICONV(check_iconv(self));
    VALUE str, s;
    int toidx = ENCODING_GET(self);

    str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
    if (argc > 0) {
	do {
	    s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
	    if (RSTRING_LEN(s))
		rb_str_buf_append(str, s);
	} while (--argc);
	s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
	if (RSTRING_LEN(s))
	    rb_str_buf_append(str, s);
    }

    return str;
}

#discard_ilseq=(flag) ⇒ Object

Sets discard_ilseq flag.



1142
1143
1144
1145
1146
1147
1148
# File 'ext/iconv/iconv.c', line 1142

static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
    int dis = RTEST(discard_ilseq);
    iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
    return self;
}

#discard_ilseq?Boolean

Returns discard_ilseq flag.

Returns:

  • (Boolean)


1123
1124
1125
1126
1127
1128
1129
1130
# File 'ext/iconv/iconv.c', line 1123

static VALUE
iconv_get_discard_ilseq(VALUE self)
{
    int dis = 0;
    iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
    if (dis) return Qtrue;
    return Qfalse;
}

#iconv(str, start = 0, length = -1) ⇒ Object

Converts string and returns the result.

  • If str is a String, converts str[start, length] and returns the converted string.

  • If str is nil, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state.

  • Otherwise, raises an exception.

Parameters

str

string to be converted, or nil

start

starting offset

length

conversion length; nil or -1 means whole the string from start

Exceptions

  • IconvIllegalSequence

  • IconvInvalidCharacter

  • IconvOutOfRange

Examples

See the Iconv documentation.



985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
# File 'ext/iconv/iconv.c', line 985

static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
    VALUE str, n1, n2;
    VALUE cd = check_iconv(self);
    long start = 0, length = 0, slen = 0;

    rb_scan_args(argc, argv, "12", &str, &n1, &n2);
    if (!NIL_P(str)) {
#ifdef HAVE_RUBY_ENCODING_H
	VALUE n = rb_str_length(StringValue(str));
	slen = NUM2LONG(n);
#else
	slen = RSTRING_LEN(StringValue(str));
#endif
    }
    if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
	if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
	    length = NIL_P(n2) ? -1 : NUM2LONG(n2);
	}
    }
    if (start > 0 || length > 0) {
#ifdef HAVE_RUBY_ENCODING_H
	const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
	const char *ps = s;
	rb_encoding *enc = rb_enc_get(str);
	if (start > 0) {
	    start = (ps = rb_enc_nth(s, e, start, enc)) - s;
	}
	if (length > 0) {
	    length = rb_enc_nth(ps, e, length, enc) - ps;
	}
#else
	if (start > slen) {
	    start = slen;
	}
	if (length > slen - start) {
	    length = slen - start;
	}
#endif
    }

    return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
}

#transliterate=(flag) ⇒ Object

Sets transliterate flag.



1105
1106
1107
1108
1109
1110
1111
# File 'ext/iconv/iconv.c', line 1105

static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
    int trans = RTEST(transliterate);
    iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
    return self;
}

#transliterate?Boolean

Returns transliterate flag.

Returns:

  • (Boolean)


1086
1087
1088
1089
1090
1091
1092
1093
# File 'ext/iconv/iconv.c', line 1086

static VALUE
iconv_get_transliterate(VALUE self)
{
    int trans = 0;
    iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
    if (trans) return Qtrue;
    return Qfalse;
}

#trivial?Boolean

Returns trivial flag.

Returns:

  • (Boolean)


1067
1068
1069
1070
1071
1072
1073
1074
# File 'ext/iconv/iconv.c', line 1067

static VALUE
iconv_trivialp(VALUE self)
{
    int trivial = 0;
    iconv_ctl(self, ICONV_TRIVIALP, trivial);
    if (trivial) return Qtrue;
    return Qfalse;
}