Class: Iconv
- Inherits:
-
Object
- Object
- Iconv
- Defined in:
- ext/iconv/iconv.c,
lib/iconv.rb,
lib/iconv/version.rb,
ext/iconv/iconv.c
Overview
Summary
Ruby extension for charset conversion.
Abstract
Iconv is a wrapper class for the UNIX 95 iconv()
function family, which translates string between various encoding systems.
See Open Group’s on-line documents for more details.
-
iconv.h
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html -
iconv_open()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html -
iconv()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.html -
iconv_close()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
Which coding systems are available is platform-dependent.
Examples
-
Simple conversion between two charsets.
converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
-
Instantiate a new Iconv and use method Iconv#iconv.
cd = Iconv.new(to, from) begin input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) # Don't forget this! ensure cd.close end
-
Invoke Iconv.open with a block.
Iconv.open(to, from) do |cd| input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) end
-
Shorthand for (3).
Iconv.iconv(to, from, *input.to_a)
Attentions
Even if some extensions of implementation dependent are useful, DON’T USE those extensions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.
Defined Under Namespace
Modules: Failure Classes: BrokenLibrary, IllegalSequence, InvalidCharacter, InvalidEncoding, OutOfRange
Constant Summary collapse
- VERSION =
"1.0.5"
Class Method Summary collapse
-
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
-
.conv(to, from, str) ⇒ Object
Shorthand for Iconv.iconv(to, from, str).join See Iconv.iconv.
-
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
-
.iconv(to, from, *strs) ⇒ Object
Shorthand for Iconv.open(to, from) { |cd| (strs + [nil]).collect { |s| cd.iconv(s) } }.
-
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
-
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
Instance Method Summary collapse
-
#close ⇒ Object
Finishes conversion.
-
#conv(str...) ⇒ Object
Equivalent to.
-
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
-
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
-
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
#new(to, from, [options]) ⇒ Object
constructor
Creates new code converter from a coding-system designated with
from
to another one designated withto
. -
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
-
#transliterate? ⇒ Boolean
Returns transliterate flag.
-
#trivial? ⇒ Boolean
Returns trivial flag.
Constructor Details
#new(to, from, [options]) ⇒ Object
Creates new code converter from a coding-system designated with from
to another one designated with to
.
Parameters
to
-
encoding name for destination
from
-
encoding name for source
options
-
options for converter
Exceptions
- TypeError
-
if
to
orfrom
aren’t String - InvalidEncoding
-
if designated converter couldn’t find out
- SystemCallError
-
if
iconv_open(3)
fails
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 |
# File 'ext/iconv/iconv.c', line 724
static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
iconv_free(check_iconv(self));
DATA_PTR(self) = NULL;
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
#ifdef HAVE_RUBY_ENCODING_H
if (idx >= 0) ENCODING_SET(self, idx);
#endif
return self;
}
|
Class Method Details
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
194 195 196 197 198 |
# File 'ext/iconv/iconv.c', line 194
static VALUE
charset_map_get(void)
{
return charset_map;
}
|
.conv(to, from, str) ⇒ Object
836 837 838 839 840 841 842 843 844 845 846 847 |
# File 'ext/iconv/iconv.c', line 836
static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
struct iconv_env_t arg;
arg.argc = 1;
arg.argv = &str;
arg.append = rb_str_append;
arg.ret = rb_str_new(0, 0);
arg.cd = iconv_create(to, from, NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 |
# File 'ext/iconv/iconv.c', line 1162
static VALUE
iconv_s_ctlmethods(VALUE klass)
{
VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
return ary;
}
|
.iconv(to, from, *strs) ⇒ Object
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 |
# File 'ext/iconv/iconv.c', line 812
static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
struct iconv_env_t arg;
if (argc < 2) /* needs `to' and `from' arguments at least */
rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
arg.argc = argc -= 2;
arg.argv = argv + 2;
arg.append = rb_ary_push;
arg.ret = rb_ary_new2(argc);
arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 |
# File 'ext/iconv/iconv.c', line 896
static VALUE
iconv_s_list(void)
{
#ifdef HAVE_ICONVLIST
int state;
VALUE args[2];
args[1] = rb_block_given_p() ? 0 : rb_ary_new();
iconvlist(list_iconv, args);
state = (int)args[0];
if (state) rb_jump_tag(state);
if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
char **list;
size_t sz, i;
VALUE ary;
if (__iconv_get_list(&list, &sz)) return Qnil;
ary = rb_ary_new2(sz);
for (i = 0; i < sz; i++) {
rb_ary_push(ary, rb_str_new2(list[i]));
}
__iconv_free_list(list, sz);
if (!rb_block_given_p())
return ary;
for (i = 0; i < RARRAY_LEN(ary); i++) {
rb_yield(RARRAY_PTR(ary)[i]);
}
#endif
return Qnil;
}
|
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 |
# File 'ext/iconv/iconv.c', line 750
static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options, cd;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
#ifdef HAVE_RUBY_ENCODING_H
if (idx >= 0) ENCODING_SET(self, idx);
#endif
if (rb_block_given_p()) {
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
}
else {
return self;
}
}
|
Instance Method Details
#close ⇒ Object
Finishes conversion.
After calling this, calling Iconv#iconv will cause an exception, but multiple calls of #close are guaranteed to end successfully.
Returns a string containing the byte sequence to change the output buffer to its initial shift state.
952 953 954 955 956 957 958 959 |
# File 'ext/iconv/iconv.c', line 952
static VALUE
iconv_finish(VALUE self)
{
VALUE cd = check_iconv(self);
if (!cd) return Qnil;
return rb_ensure(iconv_init_state, self, iconv_free, cd);
}
|
#conv(str...) ⇒ Object
Equivalent to
iconv(nil, str..., nil).join
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 |
# File 'ext/iconv/iconv.c', line 1041
static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
iconv_t cd = VALUE2ICONV(check_iconv(self));
VALUE str, s;
int toidx = ENCODING_GET(self);
str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (argc > 0) {
do {
s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
} while (--argc);
s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
}
return str;
}
|
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
1145 1146 1147 1148 1149 1150 1151 |
# File 'ext/iconv/iconv.c', line 1145
static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
int dis = RTEST(discard_ilseq);
iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
return self;
}
|
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
1126 1127 1128 1129 1130 1131 1132 1133 |
# File 'ext/iconv/iconv.c', line 1126
static VALUE
iconv_get_discard_ilseq(VALUE self)
{
int dis = 0;
iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
if (dis) return Qtrue;
return Qfalse;
}
|
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
If
str
is a String, convertsstr[start, length]
and returns the converted string. -
If
str
isnil
, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state. -
Otherwise, raises an exception.
Parameters
- str
-
string to be converted, or nil
- start
-
starting offset
- length
-
conversion length; nil or -1 means whole the string from start
Exceptions
-
IconvIllegalSequence
-
IconvInvalidCharacter
-
IconvOutOfRange
Examples
See the Iconv documentation.
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 |
# File 'ext/iconv/iconv.c', line 988
static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
VALUE str, n1, n2;
VALUE cd = check_iconv(self);
long start = 0, length = 0, slen = 0;
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
if (!NIL_P(str)) {
#ifdef HAVE_RUBY_ENCODING_H
VALUE n = rb_str_length(StringValue(str));
slen = NUM2LONG(n);
#else
slen = RSTRING_LEN(StringValue(str));
#endif
}
if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
length = NIL_P(n2) ? -1 : NUM2LONG(n2);
}
}
if (start > 0 || length > 0) {
#ifdef HAVE_RUBY_ENCODING_H
const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
const char *ps = s;
rb_encoding *enc = rb_enc_get(str);
if (start > 0) {
start = (ps = rb_enc_nth(s, e, start, enc)) - s;
}
if (length > 0) {
length = rb_enc_nth(ps, e, length, enc) - ps;
}
#else
if (start > slen) {
start = slen;
}
if (length > slen - start) {
length = slen - start;
}
#endif
}
return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
}
|
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
1108 1109 1110 1111 1112 1113 1114 |
# File 'ext/iconv/iconv.c', line 1108
static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
int trans = RTEST(transliterate);
iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
return self;
}
|
#transliterate? ⇒ Boolean
Returns transliterate flag.
1089 1090 1091 1092 1093 1094 1095 1096 |
# File 'ext/iconv/iconv.c', line 1089
static VALUE
iconv_get_transliterate(VALUE self)
{
int trans = 0;
iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
if (trans) return Qtrue;
return Qfalse;
}
|
#trivial? ⇒ Boolean
Returns trivial flag.
1070 1071 1072 1073 1074 1075 1076 1077 |
# File 'ext/iconv/iconv.c', line 1070
static VALUE
iconv_trivialp(VALUE self)
{
int trivial = 0;
iconv_ctl(self, ICONV_TRIVIALP, trivial);
if (trivial) return Qtrue;
return Qfalse;
}
|