Class: Encoding
Defined Under Namespace
Classes: CompatibilityError, Converter, ConverterNotFoundError, InvalidByteSequenceError, UndefinedConversionError
Class Method Summary (collapse)
-
+ (Object) _load
:nodoc:.
-
+ (Object) aliases
Returns the hash of available encoding alias and original encoding name.
-
+ (nil) compatible?(str1, str2)
Checks the compatibility of two strings.
-
+ (Object) default_external
Returns default external encoding.
-
+ (Object) default_external=(enc)
Sets default external encoding.
-
+ (Object) default_internal
Returns default internal encoding.
-
+ (Object) default_internal=(enc)
Sets default internal encoding.
-
+ (Object) find
Search the encoding with specified name.
-
+ (Array) list
Returns the list of loaded encodings.
-
+ (String) locale_charmap
Returns the locale charmap name.
-
+ (Array) name_list
Returns the list of available encoding names.
Instance Method Summary (collapse)
-
- (Object) _dump
:nodoc:.
-
- (Boolean) ascii_compatible?
Returns whether ASCII-compatible or not.
-
- (Boolean) dummy?
Returns true for dummy encodings.
-
- (String) inspect
Returns a string which represents the encoding for programmers.
-
- (String) name
Returns the name of the encoding.
-
- (Array) names
Returns the list of name and aliases of the encoding.
-
- (Encoding) replicate(name)
Returns a replicated encoding of enc whose name is name.
-
- (String) name
Returns the name of the encoding.
Class Method Details
+ (Object) _load
:nodoc:
|
|
# File 'encoding.c'
/* :nodoc: */
static VALUE
enc_load(VALUE klass, VALUE str)
{
return enc_find(klass, str);
}
|
+ (Object) aliases
Returns the hash of available encoding alias and original encoding name.
Encoding.aliases
#=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
"SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
*
* Returns the hash of available encoding alias and original encoding name.
*
* Encoding.aliases
* #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
* "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
*
*/
static VALUE
rb_enc_aliases(VALUE klass)
{
VALUE aliases[2];
aliases[0] = rb_hash_new();
aliases[1] = rb_ary_new();
st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
return aliases[0];
}
|
+ (nil) compatible?(str1, str2)
Checks the compatibility of two strings. If they are compatible, means concatenatable, returns an encoding which the concatenated string will be. If they are not compatible, nil is returned.
Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
#=> #<Encoding:ISO-8859-1>
Encoding.compatible?(
"\xa1".force_encoding("iso-8859-1"),
"\xa1\xa1".force_encoding("euc-jp"))
#=> nil
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.compatible?(str1, str2) -> enc or nil
*
* Checks the compatibility of two strings.
* If they are compatible, means concatenatable,
* returns an encoding which the concatenated string will be.
* If they are not compatible, nil is returned.
*
* Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
* #=> #<Encoding:ISO-8859-1>
*
* Encoding.compatible?(
* "\xa1".force_encoding("iso-8859-1"),
* "\xa1\xa1".force_encoding("euc-jp"))
* #=> nil
*
*/
static VALUE
enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
{
rb_encoding *enc;
if (!enc_capable(str1)) return Qnil;
if (!enc_capable(str2)) return Qnil;
enc = rb_enc_compatible(str1, str2);
if (!enc) return Qnil;
return rb_enc_from_encoding(enc);
}
|
+ (Object) default_external
Returns default external encoding.
It is initialized by the locale or -E option.
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.default_external -> enc
*
* Returns default external encoding.
*
* It is initialized by the locale or -E option.
*/
static VALUE
get_default_external(VALUE klass)
{
return rb_enc_default_external();
}
|
+ (Object) default_external=(enc)
Sets default external encoding.
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.default_external = enc
*
* Sets default external encoding.
*/
static VALUE
set_default_external(VALUE klass, VALUE encoding)
{
rb_warning("setting Encoding.default_external");
rb_enc_set_default_external(encoding);
return encoding;
}
|
+ (Object) default_internal
Returns default internal encoding.
It is initialized by the source internal_encoding or -E option.
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.default_internal -> enc
*
* Returns default internal encoding.
*
* It is initialized by the source internal_encoding or -E option.
*/
static VALUE
get_default_internal(VALUE klass)
{
return rb_enc_default_internal();
}
|
+ (Object) default_internal=(enc)
Sets default internal encoding. Or removes default internal encoding when passed nil.
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.default_internal = enc or nil
*
* Sets default internal encoding.
* Or removes default internal encoding when passed nil.
*/
static VALUE
set_default_internal(VALUE klass, VALUE encoding)
{
rb_warning("setting Encoding.default_internal");
rb_enc_set_default_internal(encoding);
return encoding;
}
|
+ (Object) find(string) + (Object) find(symbol)
Search the encoding with specified name. name should be a string or symbol.
Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
Names which this method accept are encoding names and aliases including following special aliases
"external" |
default external encoding |
"internal" |
default internal encoding |
"locale" |
locale encoding |
"filesystem" |
filesystem encoding |
An ArgumentError is raised when no encoding with name. Only Encoding.find("internal") however returns nil when no encoding named "internal", in other words, when Ruby has no default internal encoding.
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.find(string) -> enc
* Encoding.find(symbol) -> enc
*
* Search the encoding with specified <i>name</i>.
* <i>name</i> should be a string or symbol.
*
* Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
* Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
*
* Names which this method accept are encoding names and aliases
* including following special aliases
*
* "external":: default external encoding
* "internal":: default internal encoding
* "locale":: locale encoding
* "filesystem":: filesystem encoding
*
* An ArgumentError is raised when no encoding with <i>name</i>.
* Only <code>Encoding.find("internal")</code> however returns nil
* when no encoding named "internal", in other words, when Ruby has no
* default internal encoding.
*/
static VALUE
enc_find(VALUE klass, VALUE enc)
{
return rb_enc_from_encoding(to_encoding(enc));
}
|
+ (Array) list
Returns the list of loaded encodings.
Encoding.list
#=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
#<Encoding:ISO-2022-JP (dummy)>]
Encoding.find("US-ASCII")
#=> #<Encoding:US-ASCII>
Encoding.list
#=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
#<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.list -> [enc1, enc2, ...]
*
* Returns the list of loaded encodings.
*
* Encoding.list
* #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
* #<Encoding:ISO-2022-JP (dummy)>]
*
* Encoding.find("US-ASCII")
* #=> #<Encoding:US-ASCII>
*
* Encoding.list
* #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
* #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
*
*/
static VALUE
enc_list(VALUE klass)
{
VALUE ary = rb_ary_new2(0);
rb_ary_replace(ary, rb_encoding_list);
return ary;
}
|
+ (String) locale_charmap
Returns the locale charmap name.
Debian GNU/Linux
LANG=C
Encoding.locale_charmap #=> "ANSI_X3.4-1968"
LANG=ja_JP.EUC-JP
Encoding.locale_charmap #=> "EUC-JP"
SunOS 5
LANG=C
Encoding.locale_charmap #=> "646"
LANG=ja
Encoding.locale_charmap #=> "eucJP"
The result is highly platform dependent. So Encoding.find(Encoding.locale_charmap) may cause an error. If you need some encoding object even for unknown locale, Encoding.find("locale") can be used.
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.locale_charmap -> string
*
* Returns the locale charmap name.
*
* Debian GNU/Linux
* LANG=C
* Encoding.locale_charmap #=> "ANSI_X3.4-1968"
* LANG=ja_JP.EUC-JP
* Encoding.locale_charmap #=> "EUC-JP"
*
* SunOS 5
* LANG=C
* Encoding.locale_charmap #=> "646"
* LANG=ja
* Encoding.locale_charmap #=> "eucJP"
*
* The result is highly platform dependent.
* So Encoding.find(Encoding.locale_charmap) may cause an error.
* If you need some encoding object even for unknown locale,
* Encoding.find("locale") can be used.
*
*/
VALUE
rb_locale_charmap(VALUE klass)
{
#if defined NO_LOCALE_CHARMAP
return rb_usascii_str_new2("ASCII-8BIT");
#elif defined _WIN32 || defined __CYGWIN__
const char *nl_langinfo_codeset(void);
const char *codeset = nl_langinfo_codeset();
char cp[sizeof(int) * 3 + 4];
if (!codeset) {
snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP());
codeset = cp;
}
return rb_usascii_str_new2(codeset);
#elif defined HAVE_LANGINFO_H
char *codeset;
codeset = nl_langinfo(CODESET);
return rb_usascii_str_new2(codeset);
#else
return Qnil;
#endif
}
|
+ (Array) name_list
Returns the list of available encoding names.
Encoding.name_list
#=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
"ISO-8859-1", "Shift_JIS", "EUC-JP",
"Windows-31J",
"BINARY", "CP932", "eucJP"]
|
|
# File 'encoding.c'
/*
* call-seq:
* Encoding.name_list -> ["enc1", "enc2", ...]
*
* Returns the list of available encoding names.
*
* Encoding.name_list
* #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
* "ISO-8859-1", "Shift_JIS", "EUC-JP",
* "Windows-31J",
* "BINARY", "CP932", "eucJP"]
*
*/
static VALUE
rb_enc_name_list(VALUE klass)
{
VALUE ary = rb_ary_new2(enc_table.names->num_entries);
st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
return ary;
}
|
Instance Method Details
- (Object) _dump
:nodoc:
|
|
# File 'encoding.c'
/* :nodoc: */
static VALUE
enc_dump(int argc, VALUE *argv, VALUE self)
{
rb_scan_args(argc, argv, "01", 0);
return enc_name(self);
}
|
- (Boolean) ascii_compatible?
Returns whether ASCII-compatible or not.
Encoding::UTF_8.ascii_compatible? #=> true
Encoding::UTF_16BE.ascii_compatible? #=> false
|
|
# File 'encoding.c'
/*
* call-seq:
* enc.ascii_compatible? -> true or false
*
* Returns whether ASCII-compatible or not.
*
* Encoding::UTF_8.ascii_compatible? #=> true
* Encoding::UTF_16BE.ascii_compatible? #=> false
*
*/
static VALUE
enc_ascii_compatible_p(VALUE enc)
{
return rb_enc_asciicompat(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
}
|
- (Boolean) dummy?
Returns true for dummy encodings. A dummy encoding is an encoding for which character handling is not properly implemented. It is used for stateful encodings.
Encoding::ISO_2022_JP.dummy? #=> true
Encoding::UTF_8.dummy? #=> false
|
|
# File 'encoding.c'
/*
* call-seq:
* enc.dummy? -> true or false
*
* Returns true for dummy encodings.
* A dummy encoding is an encoding for which character handling is not properly
* implemented.
* It is used for stateful encodings.
*
* Encoding::ISO_2022_JP.dummy? #=> true
* Encoding::UTF_8.dummy? #=> false
*
*/
static VALUE
enc_dummy_p(VALUE enc)
{
return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
}
|
- (String) inspect
Returns a string which represents the encoding for programmers.
Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
|
|
# File 'encoding.c'
/*
* call-seq:
* enc.inspect -> string
*
* Returns a string which represents the encoding for programmers.
*
* Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
* Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
*/
static VALUE
enc_inspect(VALUE self)
{
VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
rb_enc_name((rb_encoding*)DATA_PTR(self)),
(enc_dummy_p(self) ? " (dummy)" : ""));
ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
return str;
}
|
- (String) name
Returns the name of the encoding.
Encoding::UTF_8.name #=> "UTF-8"
|
|
# File 'encoding.c'
/*
* call-seq:
* enc.name -> string
*
* Returns the name of the encoding.
*
* Encoding::UTF_8.name #=> "UTF-8"
*/
static VALUE
enc_name(VALUE self)
{
return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
}
|
- (Array) names
Returns the list of name and aliases of the encoding.
Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
|
|
# File 'encoding.c'
/*
* call-seq:
* enc.names -> array
*
* Returns the list of name and aliases of the encoding.
*
* Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
*/
static VALUE
enc_names(VALUE self)
{
VALUE args[2];
args[0] = (VALUE)rb_to_encoding_index(self);
args[1] = rb_ary_new2(0);
st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
return args[1];
}
|
- (Encoding) replicate(name)
Returns a replicated encoding of enc whose name is name. The new encoding should have the same byte structure of enc. If name is used by another encoding, raise ArgumentError.
|
|
# File 'encoding.c'
/*
* call-seq:
* enc.replicate(name) -> encoding
*
* Returns a replicated encoding of _enc_ whose name is _name_.
* The new encoding should have the same byte structure of _enc_.
* If _name_ is used by another encoding, raise ArgumentError.
*
*/
static VALUE
enc_replicate(VALUE encoding, VALUE name)
{
return rb_enc_from_encoding_index(
rb_enc_replicate(StringValueCStr(name),
rb_to_encoding(encoding)));
}
|
- (String) name
Returns the name of the encoding.
Encoding::UTF_8.name #=> "UTF-8"
|
|
# File 'encoding.c'
/*
* call-seq:
* enc.name -> string
*
* Returns the name of the encoding.
*
* Encoding::UTF_8.name #=> "UTF-8"
*/
static VALUE
enc_name(VALUE self)
{
return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
}
|