Class: Iconv
- Inherits:
-
Data
- Object
- Data
- Iconv
- Defined in:
- ext/iconv/iconv.c,
lib/iconv.rb,
lib/iconv/version.rb,
ext/iconv/iconv.c
Overview
Summary
Ruby extension for charset conversion.
Abstract
Iconv is a wrapper class for the UNIX 95 iconv()
function family, which translates string between various encoding systems.
See Open Group’s on-line documents for more details.
-
iconv.h
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html -
iconv_open()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html -
iconv()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv.html -
iconv_close()
: www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html
Which coding systems are available is platform-dependent.
Examples
-
Simple conversion between two charsets.
converted_text = Iconv.conv('iso-8859-15', 'utf-8', text)
-
Instantiate a new Iconv and use method Iconv#iconv.
cd = Iconv.new(to, from) begin input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) # Don't forget this! ensure cd.close end
-
Invoke Iconv.open with a block.
Iconv.open(to, from) do |cd| input.each { |s| output << cd.iconv(s) } output << cd.iconv(nil) end
-
Shorthand for (3).
Iconv.iconv(to, from, *input.to_a)
Attentions
Even if some extentions of implementation dependent are useful, DON’T USE those extentions in libraries and scripts to widely distribute. If you want to use those feature, use String#encode.
Defined Under Namespace
Modules: Failure Classes: BrokenLibrary, IllegalSequence, InvalidCharacter, InvalidEncoding, OutOfRange
Constant Summary collapse
- VERSION =
"1.0.3"
Class Method Summary collapse
-
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
-
.conv(to, from, str) ⇒ Object
Shorthand for Iconv.iconv(to, from, str).join See Iconv.iconv.
-
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
-
.iconv(to, from, *strs) ⇒ Object
Shorthand for Iconv.open(to, from) { |cd| (strs + [nil]).collect { |s| cd.iconv(s) } }.
-
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
-
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
Instance Method Summary collapse
-
#close ⇒ Object
Finishes conversion.
-
#conv(str...) ⇒ Object
Equivalent to.
-
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
-
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
-
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
#new(to, from, [options]) ⇒ Object
constructor
Creates new code converter from a coding-system designated with
from
to another one designated withto
. -
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
-
#transliterate? ⇒ Boolean
Returns transliterate flag.
-
#trivial? ⇒ Boolean
Returns trivial flag.
Constructor Details
#new(to, from, [options]) ⇒ Object
Creates new code converter from a coding-system designated with from
to another one designated with to
.
Parameters
to
-
encoding name for destination
from
-
encoding name for source
options
-
options for converter
Exceptions
- TypeError
-
if
to
orfrom
aren’t String - InvalidEncoding
-
if designated converter couldn’t find out
- SystemCallError
-
if
iconv_open(3)
fails
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 |
# File 'ext/iconv/iconv.c', line 721
static VALUE
iconv_initialize(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
iconv_free(check_iconv(self));
DATA_PTR(self) = NULL;
DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
#ifdef HAVE_RUBY_ENCODING_H
if (idx >= 0) ENCODING_SET(self, idx);
#endif
return self;
}
|
Class Method Details
.charset_map ⇒ Object
Returns the map from canonical name to system dependent name.
192 193 194 195 196 |
# File 'ext/iconv/iconv.c', line 192
static VALUE
charset_map_get(void)
{
return charset_map;
}
|
.conv(to, from, str) ⇒ Object
833 834 835 836 837 838 839 840 841 842 843 844 |
# File 'ext/iconv/iconv.c', line 833
static VALUE
iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
{
struct iconv_env_t arg;
arg.argc = 1;
arg.argv = &str;
arg.append = rb_str_append;
arg.ret = rb_str_new(0, 0);
arg.cd = iconv_create(to, from, NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.ctlmethods ⇒ Array
Returns available iconvctl() method list.
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 |
# File 'ext/iconv/iconv.c', line 1159
static VALUE
iconv_s_ctlmethods(VALUE klass)
{
VALUE ary = rb_ary_new();
#ifdef ICONV_TRIVIALP
rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
#endif
#ifdef ICONV_GET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
#endif
#ifdef ICONV_SET_TRANSLITERATE
rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
#endif
#ifdef ICONV_GET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
#endif
#ifdef ICONV_SET_DISCARD_ILSEQ
rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
#endif
return ary;
}
|
.iconv(to, from, *strs) ⇒ Object
809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 |
# File 'ext/iconv/iconv.c', line 809
static VALUE
iconv_s_iconv(int argc, VALUE *argv, VALUE self)
{
struct iconv_env_t arg;
if (argc < 2) /* needs `to' and `from' arguments at least */
rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
arg.argc = argc -= 2;
arg.argv = argv + 2;
arg.append = rb_ary_push;
arg.ret = rb_ary_new2(argc);
arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
}
|
.list {|*aliases| ... } ⇒ Object
Iterates each alias sets.
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 |
# File 'ext/iconv/iconv.c', line 893
static VALUE
iconv_s_list(void)
{
#ifdef HAVE_ICONVLIST
int state;
VALUE args[2];
args[1] = rb_block_given_p() ? 0 : rb_ary_new();
iconvlist(list_iconv, args);
state = (int)args[0];
if (state) rb_jump_tag(state);
if (args[1]) return args[1];
#elif defined(HAVE___ICONV_FREE_LIST)
char **list;
size_t sz, i;
VALUE ary;
if (__iconv_get_list(&list, &sz)) return Qnil;
ary = rb_ary_new2(sz);
for (i = 0; i < sz; i++) {
rb_ary_push(ary, rb_str_new2(list[i]));
}
__iconv_free_list(list, sz);
if (!rb_block_given_p())
return ary;
for (i = 0; i < RARRAY_LEN(ary); i++) {
rb_yield(RARRAY_PTR(ary)[i]);
}
#endif
return Qnil;
}
|
.open(to, from) {|iconv| ... } ⇒ Object
Equivalent to Iconv.new except that when it is called with a block, it yields with the new instance and closes it, and returns the result which returned from the block.
747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 |
# File 'ext/iconv/iconv.c', line 747
static VALUE
iconv_s_open(int argc, VALUE *argv, VALUE self)
{
VALUE to, from, options, cd;
struct rb_iconv_opt_t opt;
int idx;
rb_scan_args(argc, argv, "21", &to, &from, &options);
get_iconv_opt(&opt, options);
cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
#ifdef HAVE_RUBY_ENCODING_H
if (idx >= 0) ENCODING_SET(self, idx);
#endif
if (rb_block_given_p()) {
return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
}
else {
return self;
}
}
|
Instance Method Details
#close ⇒ Object
Finishes conversion.
After calling this, calling Iconv#iconv will cause an exception, but multiple calls of #close are guaranteed to end successfully.
Returns a string containing the byte sequence to change the output buffer to its initial shift state.
949 950 951 952 953 954 955 956 |
# File 'ext/iconv/iconv.c', line 949
static VALUE
iconv_finish(VALUE self)
{
VALUE cd = check_iconv(self);
if (!cd) return Qnil;
return rb_ensure(iconv_init_state, self, iconv_free, cd);
}
|
#conv(str...) ⇒ Object
Equivalent to
iconv(nil, str..., nil).join
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 |
# File 'ext/iconv/iconv.c', line 1038
static VALUE
iconv_conv(int argc, VALUE *argv, VALUE self)
{
iconv_t cd = VALUE2ICONV(check_iconv(self));
VALUE str, s;
int toidx = ENCODING_GET(self);
str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (argc > 0) {
do {
s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
} while (--argc);
s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
if (RSTRING_LEN(s))
rb_str_buf_append(str, s);
}
return str;
}
|
#discard_ilseq=(flag) ⇒ Object
Sets discard_ilseq flag.
1142 1143 1144 1145 1146 1147 1148 |
# File 'ext/iconv/iconv.c', line 1142
static VALUE
iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
{
int dis = RTEST(discard_ilseq);
iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
return self;
}
|
#discard_ilseq? ⇒ Boolean
Returns discard_ilseq flag.
1123 1124 1125 1126 1127 1128 1129 1130 |
# File 'ext/iconv/iconv.c', line 1123
static VALUE
iconv_get_discard_ilseq(VALUE self)
{
int dis = 0;
iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
if (dis) return Qtrue;
return Qfalse;
}
|
#iconv(str, start = 0, length = -1) ⇒ Object
Converts string and returns the result.
-
If
str
is a String, convertsstr[start, length]
and returns the converted string. -
If
str
isnil
, places converter itself into initial shift state and just returns a string containing the byte sequence to change the output buffer to its initial shift state. -
Otherwise, raises an exception.
Parameters
- str
-
string to be converted, or nil
- start
-
starting offset
- length
-
conversion length; nil or -1 means whole the string from start
Exceptions
-
IconvIllegalSequence
-
IconvInvalidCharacter
-
IconvOutOfRange
Examples
See the Iconv documentation.
985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 |
# File 'ext/iconv/iconv.c', line 985
static VALUE
iconv_iconv(int argc, VALUE *argv, VALUE self)
{
VALUE str, n1, n2;
VALUE cd = check_iconv(self);
long start = 0, length = 0, slen = 0;
rb_scan_args(argc, argv, "12", &str, &n1, &n2);
if (!NIL_P(str)) {
#ifdef HAVE_RUBY_ENCODING_H
VALUE n = rb_str_length(StringValue(str));
slen = NUM2LONG(n);
#else
slen = RSTRING_LEN(StringValue(str));
#endif
}
if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
length = NIL_P(n2) ? -1 : NUM2LONG(n2);
}
}
if (start > 0 || length > 0) {
#ifdef HAVE_RUBY_ENCODING_H
const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
const char *ps = s;
rb_encoding *enc = rb_enc_get(str);
if (start > 0) {
start = (ps = rb_enc_nth(s, e, start, enc)) - s;
}
if (length > 0) {
length = rb_enc_nth(ps, e, length, enc) - ps;
}
#else
if (start > slen) {
start = slen;
}
if (length > slen - start) {
length = slen - start;
}
#endif
}
return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
}
|
#transliterate=(flag) ⇒ Object
Sets transliterate flag.
1105 1106 1107 1108 1109 1110 1111 |
# File 'ext/iconv/iconv.c', line 1105
static VALUE
iconv_set_transliterate(VALUE self, VALUE transliterate)
{
int trans = RTEST(transliterate);
iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
return self;
}
|
#transliterate? ⇒ Boolean
Returns transliterate flag.
1086 1087 1088 1089 1090 1091 1092 1093 |
# File 'ext/iconv/iconv.c', line 1086
static VALUE
iconv_get_transliterate(VALUE self)
{
int trans = 0;
iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
if (trans) return Qtrue;
return Qfalse;
}
|
#trivial? ⇒ Boolean
Returns trivial flag.
1067 1068 1069 1070 1071 1072 1073 1074 |
# File 'ext/iconv/iconv.c', line 1067
static VALUE
iconv_trivialp(VALUE self)
{
int trivial = 0;
iconv_ctl(self, ICONV_TRIVIALP, trivial);
if (trivial) return Qtrue;
return Qfalse;
}
|