Class: String

Inherits:
Object show all
Includes:
Comparable
Defined in:
string.c

Overview

A String object holds and manipulates an arbitrary sequence of bytes, typically representing characters. String objects may be created using String::new or as literals.

Because of aliasing issues, users of strings should be aware of the methods that modify the contents of a String object. Typically, methods with names ending in “!'' modify their receiver, while those without a “!'' return a new String. However, there are exceptions, such as String#[]=.

Direct Known Subclasses

Warning::buffer

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Comparable

#<, #<=, #>, #>=, #between?, #clamp

Constructor Details

#new(str = "") ⇒ String #new(str = "", encoding: enc) ⇒ String #new(str = "", capacity: size) ⇒ String

Returns a new string object containing a copy of str.

The optional encoding keyword argument specifies the encoding of the new string. If not specified, the encoding of str is used (or ASCII-8BIT, if str is not specified).

The optional capacity keyword argument specifies the size of the internal buffer. This may improve performance, when the string will be concatenated many times (causing many realloc calls).


1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
# File 'string.c', line 1562

static VALUE
rb_str_init(int argc, VALUE *argv, VALUE str)
{
    static ID keyword_ids[2];
    VALUE orig, opt, venc, vcapa;
    VALUE kwargs[2];
    rb_encoding *enc = 0;
    int n;

    if (!keyword_ids[0]) {
  keyword_ids[0] = rb_id_encoding();
  CONST_ID(keyword_ids[1], "capacity");
    }

    n = rb_scan_args(argc, argv, "01:", &orig, &opt);
    if (!NIL_P(opt)) {
  rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
  venc = kwargs[0];
  vcapa = kwargs[1];
  if (venc != Qundef && !NIL_P(venc)) {
      enc = rb_to_encoding(venc);
  }
  if (vcapa != Qundef && !NIL_P(vcapa)) {
      long capa = NUM2LONG(vcapa);
      long len = 0;
      int termlen = enc ? rb_enc_mbminlen(enc) : 1;

      if (capa < STR_BUF_MIN_SIZE) {
    capa = STR_BUF_MIN_SIZE;
      }
      if (n == 1) {
    StringValue(orig);
    len = RSTRING_LEN(orig);
    if (capa < len) {
        capa = len;
    }
    if (orig == str) n = 0;
      }
      str_modifiable(str);
      if (STR_EMBED_P(str)) { /* make noembed always */
                char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
                memcpy(new_ptr, RSTRING(str)->as.ary, RSTRING_EMBED_LEN_MAX + 1);
                RSTRING(str)->as.heap.ptr = new_ptr;
            }
            else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
                const size_t size = (size_t)capa + termlen;
                const char *const old_ptr = RSTRING_PTR(str);
                const size_t osize = RSTRING(str)->as.heap.len + TERM_LEN(str);
                char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
                memcpy(new_ptr, old_ptr, osize < size ? osize : size);
                FL_UNSET_RAW(str, STR_SHARED);
                RSTRING(str)->as.heap.ptr = new_ptr;
      }
      else if (STR_HEAP_SIZE(str) != (size_t)capa + termlen) {
    SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char,
      (size_t)capa + termlen, STR_HEAP_SIZE(str));
      }
      RSTRING(str)->as.heap.len = len;
      TERM_FILL(&RSTRING(str)->as.heap.ptr[len], termlen);
      if (n == 1) {
    memcpy(RSTRING(str)->as.heap.ptr, RSTRING_PTR(orig), len);
    rb_enc_cr_str_exact_copy(str, orig);
      }
      FL_SET(str, STR_NOEMBED);
      RSTRING(str)->as.heap.aux.capa = capa;
  }
  else if (n == 1) {
      rb_str_replace(str, orig);
  }
  if (enc) {
      rb_enc_associate(str, enc);
      ENC_CODERANGE_CLEAR(str);
  }
    }
    else if (n == 1) {
  rb_str_replace(str, orig);
    }
    return str;
}

Class Method Details

.try_convert(obj) ⇒ String?

Try to convert obj into a String, using to_str method. Returns converted string or nil if obj cannot be converted for any reason.

String.try_convert("str")     #=> "str"
String.try_convert(/re/)      #=> nil

2346
2347
2348
2349
2350
# File 'string.c', line 2346

static VALUE
rb_str_s_try_convert(VALUE dummy, VALUE str)
{
    return rb_check_string_type(str);
}

Instance Method Details

#%(arg) ⇒ String

Format—Uses str as a format specification, and returns the result of applying it to arg. If the format specification contains more than one substitution, then arg must be an Array or Hash containing the values to be substituted. See Kernel#sprintf for details of the format string.

"%05d" % 123                              #=> "00123"
"%-5s: %016x" % [ "ID", self.object_id ]  #=> "ID   : 00002b054ec93168"
"foo = %{foo}" % { :foo => 'bar' }        #=> "foo = bar"

2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
# File 'string.c', line 2050

static VALUE
rb_str_format_m(VALUE str, VALUE arg)
{
    VALUE tmp = rb_check_array_type(arg);

    if (!NIL_P(tmp)) {
        return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str);
    }
    return rb_str_format(1, &arg, str);
}

#*(integer) ⇒ String

Copy — Returns a new String containing integer copies of the receiver. integer must be greater than or equal to 0.

"Ho! " * 3   #=> "Ho! Ho! Ho! "
"Ho! " * 0   #=> ""

1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
# File 'string.c', line 1980

VALUE
rb_str_times(VALUE str, VALUE times)
{
    VALUE str2;
    long n, len;
    char *ptr2;
    int termlen;

    if (times == INT2FIX(1)) {
  return rb_str_dup(str);
    }
    if (times == INT2FIX(0)) {
  str2 = str_alloc(rb_obj_class(str));
  rb_enc_copy(str2, str);
  return str2;
    }
    len = NUM2LONG(times);
    if (len < 0) {
  rb_raise(rb_eArgError, "negative argument");
    }
    if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
       str2 = str_alloc(rb_obj_class(str));
       if (!STR_EMBEDDABLE_P(len, 1)) {
           RSTRING(str2)->as.heap.aux.capa = len;
           RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
           STR_SET_NOEMBED(str2);
       }
       STR_SET_LEN(str2, len);
       rb_enc_copy(str2, str);
       return str2;
    }
    if (len && LONG_MAX/len <  RSTRING_LEN(str)) {
  rb_raise(rb_eArgError, "argument too big");
    }

    len *= RSTRING_LEN(str);
    termlen = TERM_LEN(str);
    str2 = str_new0(rb_obj_class(str), 0, len, termlen);
    ptr2 = RSTRING_PTR(str2);
    if (len) {
        n = RSTRING_LEN(str);
        memcpy(ptr2, RSTRING_PTR(str), n);
        while (n <= len/2) {
            memcpy(ptr2 + n, ptr2, n);
            n *= 2;
        }
        memcpy(ptr2 + n, ptr2, len-n);
    }
    STR_SET_LEN(str2, len);
    TERM_FILL(&ptr2[len], termlen);
    rb_enc_cr_str_copy_for_substr(str2, str);

    return str2;
}

#+(other_str) ⇒ String

Concatenation—Returns a new String containing other_str concatenated to str.

"Hello from " + self.to_s   #=> "Hello from main"

1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
# File 'string.c', line 1908

VALUE
rb_str_plus(VALUE str1, VALUE str2)
{
    VALUE str3;
    rb_encoding *enc;
    char *ptr1, *ptr2, *ptr3;
    long len1, len2;
    int termlen;

    StringValue(str2);
    enc = rb_enc_check_str(str1, str2);
    RSTRING_GETMEM(str1, ptr1, len1);
    RSTRING_GETMEM(str2, ptr2, len2);
    termlen = rb_enc_mbminlen(enc);
    if (len1 > LONG_MAX - len2) {
  rb_raise(rb_eArgError, "string size too big");
    }
    str3 = str_new0(rb_cString, 0, len1+len2, termlen);
    ptr3 = RSTRING_PTR(str3);
    memcpy(ptr3, ptr1, len1);
    memcpy(ptr3+len1, ptr2, len2);
    TERM_FILL(&ptr3[len1+len2], termlen);

    ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
         ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
    RB_GC_GUARD(str1);
    RB_GC_GUARD(str2);
    return str3;
}

#+Object

If the string is frozen, then return duplicated mutable string.

If the string is not frozen, then return the string itself.


2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
# File 'string.c', line 2647

static VALUE
str_uplus(VALUE str)
{
    if (OBJ_FROZEN(str)) {
  return rb_str_dup(str);
    }
    else {
  return str;
    }
}

#-Object

Returns a frozen, possibly pre-existing copy of the string.

The string will be deduplicated as long as it does not have any instance variables set on it.


2667
2668
2669
2670
2671
2672
2673
2674
# File 'string.c', line 2667

static VALUE
str_uminus(VALUE str)
{
    if (!BARE_STRING_P(str) && !rb_obj_frozen_p(str)) {
        str = rb_str_dup(str);
    }
    return rb_fstring(str);
}

#<<(obj) ⇒ String #<<(integer) ⇒ String

Appends the given object to str. If the object is an Integer, it is considered a codepoint and converted to a character before being appended.

a = "hello "
a << "world"   #=> "hello world"
a << 33        #=> "hello world!"

See also String#concat, which takes multiple arguments.


3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
# File 'string.c', line 3079

VALUE
rb_str_concat(VALUE str1, VALUE str2)
{
    unsigned int code;
    rb_encoding *enc = STR_ENC_GET(str1);
    int encidx;

    if (RB_INTEGER_TYPE_P(str2)) {
  if (rb_num_to_uint(str2, &code) == 0) {
  }
  else if (FIXNUM_P(str2)) {
      rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
  }
  else {
      rb_raise(rb_eRangeError, "bignum out of char range");
  }
    }
    else {
  return rb_str_append(str1, str2);
    }

    encidx = rb_enc_to_index(enc);
    if (encidx == ENCINDEX_ASCII || encidx == ENCINDEX_US_ASCII) {
  /* US-ASCII automatically extended to ASCII-8BIT */
  char buf[1];
  buf[0] = (char)code;
  if (code > 0xFF) {
      rb_raise(rb_eRangeError, "%u out of char range", code);
  }
  rb_str_cat(str1, buf, 1);
  if (encidx == ENCINDEX_US_ASCII && code > 127) {
      rb_enc_associate_index(str1, ENCINDEX_ASCII);
      ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
  }
    }
    else {
  long pos = RSTRING_LEN(str1);
  int cr = ENC_CODERANGE(str1);
  int len;
  char *buf;

  switch (len = rb_enc_codelen(code, enc)) {
    case ONIGERR_INVALID_CODE_POINT_VALUE:
      rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
      break;
    case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
    case 0:
      rb_raise(rb_eRangeError, "%u out of char range", code);
      break;
  }
  buf = ALLOCA_N(char, len + 1);
  rb_enc_mbcput(code, buf, enc);
  if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
      rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
  }
  rb_str_resize(str1, pos+len);
  memcpy(RSTRING_PTR(str1) + pos, buf, len);
  if (cr == ENC_CODERANGE_7BIT && code > 127)
      cr = ENC_CODERANGE_VALID;
  ENC_CODERANGE_SET(str1, cr);
    }
    return str1;
}

#<=>(other_string) ⇒ -1, ...

Comparison—Returns -1, 0, +1, or nil depending on whether string is less than, equal to, or greater than other_string.

nil is returned if the two values are incomparable.

If the strings are of different lengths, and the strings are equal when compared up to the shortest length, then the longer string is considered greater than the shorter one.

<=> is the basis for the methods <, <=, >, >=, and between?, included from module Comparable. The method String#== does not use Comparable#==.

"abcdef" <=> "abcde"     #=> 1
"abcdef" <=> "abcdef"    #=> 0
"abcdef" <=> "abcdefg"   #=> -1
"abcdef" <=> "ABCDEF"    #=> 1
"abcdef" <=> 1           #=> nil

3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
# File 'string.c', line 3334

static VALUE
rb_str_cmp_m(VALUE str1, VALUE str2)
{
    int result;
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
  return rb_invcmp(str1, str2);
    }
    result = rb_str_cmp(str1, s);
    return INT2FIX(result);
}

#==(obj) ⇒ Boolean #===(obj) ⇒ Boolean

Equality—Returns whether str == obj, similar to Object#==.

If obj is not an instance of String but responds to to_str, then the two strings are compared using obj.==.

Otherwise, returns similarly to String#eql?, comparing length and content.


3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
# File 'string.c', line 3281

VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) {
  if (!rb_respond_to(str2, idTo_str)) {
      return Qfalse;
  }
  return rb_equal(str2, str1);
    }
    return rb_str_eql_internal(str1, str2);
}

#==(obj) ⇒ Boolean #===(obj) ⇒ Boolean

Equality—Returns whether str == obj, similar to Object#==.

If obj is not an instance of String but responds to to_str, then the two strings are compared using obj.==.

Otherwise, returns similarly to String#eql?, comparing length and content.


3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
# File 'string.c', line 3281

VALUE
rb_str_equal(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) {
  if (!rb_respond_to(str2, idTo_str)) {
      return Qfalse;
  }
  return rb_equal(str2, str1);
    }
    return rb_str_eql_internal(str1, str2);
}

#=~(obj) ⇒ Integer?

Match—If obj is a Regexp, use it as a pattern to match against str,and returns the position the match starts, or nil if there is no match. Otherwise, invokes obj.=~, passing str as an argument. The default =~ in Object returns nil.

Note: str =~ regexp is not the same as regexp =~ str. Strings captured from named capture groups are assigned to local variables only in the second case.

"cat o' 9 tails" =~ /\d/   #=> 7
"cat o' 9 tails" =~ 9      #=> nil

3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
# File 'string.c', line 3802

static VALUE
rb_str_match(VALUE x, VALUE y)
{
    if (SPECIAL_CONST_P(y)) goto generic;
    switch (BUILTIN_TYPE(y)) {
      case T_STRING:
  rb_raise(rb_eTypeError, "type mismatch: String given");

      case T_REGEXP:
  return rb_reg_match(y, x);

      generic:
      default:
  return rb_funcall(y, idEqTilde, 1, x);
    }
}

#[](index) ⇒ String? #[](start, length) ⇒ String? #[](range) ⇒ String? #[](regexp) ⇒ String? #[](regexp, capture) ⇒ String? #[](match_str) ⇒ String? #slice(index) ⇒ String? #slice(start, length) ⇒ String? #slice(range) ⇒ String? #slice(regexp) ⇒ String? #slice(regexp, capture) ⇒ String? #slice(match_str) ⇒ String?

Element Reference — If passed a single index, returns a substring of one character at that index. If passed a start index and a length, returns a substring containing length characters starting at the start index. If passed a range, its beginning and end are interpreted as offsets delimiting the substring to be returned.

In these three cases, if an index is negative, it is counted from the end of the string. For the start and range cases the starting index is just before a character and an index matching the string's size. Additionally, an empty string is returned when the starting index for a character range is at the end of the string.

Returns nil if the initial index falls outside the string or the length is negative.

If a Regexp is supplied, the matching portion of the string is returned. If a capture follows the regular expression, which may be a capture group index or name, follows the regular expression that component of the MatchData is returned instead.

If a match_str is given, that string is returned if it occurs in the string.

Returns nil if the regular expression does not match or the match string cannot be found.

a = "hello there"

a[1]                   #=> "e"
a[2, 3]                #=> "llo"
a[2..3]                #=> "ll"

a[-3, 2]               #=> "er"
a[7..-2]               #=> "her"
a[-4..-2]              #=> "her"
a[-2..-4]              #=> ""

a[11, 0]               #=> ""
a[11]                  #=> nil
a[12, 0]               #=> nil
a[12..-1]              #=> nil

a[/[aeiou](.)\1/]      #=> "ell"
a[/[aeiou](.)\1/, 0]   #=> "ell"
a[/[aeiou](.)\1/, 1]   #=> "l"
a[/[aeiou](.)\1/, 2]   #=> nil

a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"]     #=> "e"

a["lo"]                #=> "lo"
a["bye"]               #=> nil

4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
# File 'string.c', line 4570

static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
  if (RB_TYPE_P(argv[0], T_REGEXP)) {
      return rb_str_subpat(str, argv[0], argv[1]);
  }
  else {
      long beg = NUM2LONG(argv[0]);
      long len = NUM2LONG(argv[1]);
      return rb_str_substr(str, beg, len);
  }
    }
    rb_check_arity(argc, 1, 2);
    return rb_str_aref(str, argv[0]);
}

#[]=(integer) ⇒ Object #[]=(integer, integer) ⇒ Object #[]=(range) ⇒ Object #[]=(regexp) ⇒ Object #[]=(regexp, integer) ⇒ Object #[]=(regexp, name) ⇒ Object #[]=(other_str) ⇒ Object

Element Assignment—Replaces some or all of the content of str. The portion of the string affected is determined using the same criteria as String#[]. If the replacement string is not the same length as the text it is replacing, the string will be adjusted accordingly. If the regular expression or string is used as the index doesn't match a position in the string, IndexError is raised. If the regular expression form is used, the optional second Integer allows you to specify which portion of the match to replace (effectively using the MatchData indexing rules. The forms that take an Integer will raise an IndexError if the value is out of range; the Range form will raise a RangeError, and the Regexp and String will raise an IndexError on negative match.


4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
# File 'string.c', line 4808

static VALUE
rb_str_aset_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 3) {
  if (RB_TYPE_P(argv[0], T_REGEXP)) {
      rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
  }
  else {
      rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
  }
  return argv[2];
    }
    rb_check_arity(argc, 2, 3);
    return rb_str_aset(str, argv[0], argv[1]);
}

#ascii_only?Boolean

Returns true for a string which has only ASCII characters.

"abc".force_encoding("UTF-8").ascii_only?          #=> true
"abc\u{6666}".force_encoding("UTF-8").ascii_only?  #=> false

10177
10178
10179
10180
10181
10182
10183
# File 'string.c', line 10177

static VALUE
rb_str_is_ascii_only_p(VALUE str)
{
    int cr = rb_enc_str_coderange(str);

    return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
}

#bString

Returns a copied string whose encoding is ASCII-8BIT.


10139
10140
10141
10142
10143
10144
10145
10146
# File 'string.c', line 10139

static VALUE
rb_str_b(VALUE str)
{
    VALUE str2 = str_alloc(rb_cString);
    str_replace_shared_without_enc(str2, str);
    ENC_CODERANGE_CLEAR(str2);
    return str2;
}

#bytesArray

Returns an array of bytes in str. This is a shorthand for str.each_byte.to_a.

If a block is given, which is a deprecated form, works the same as each_byte.


8438
8439
8440
8441
8442
8443
# File 'string.c', line 8438

static VALUE
rb_str_bytes(VALUE str)
{
    VALUE ary = WANTARRAY("bytes", RSTRING_LEN(str));
    return rb_str_enumerate_bytes(str, ary);
}

#bytesizeInteger

Returns the length of str in bytes.

"\x80\u3042".bytesize  #=> 4
"hello".bytesize       #=> 5

1873
1874
1875
1876
1877
# File 'string.c', line 1873

static VALUE
rb_str_bytesize(VALUE str)
{
    return LONG2NUM(RSTRING_LEN(str));
}

#byteslice(integer) ⇒ String? #byteslice(integer, integer) ⇒ String? #byteslice(range) ⇒ String?

Byte Reference—If passed a single Integer, returns a substring of one byte at that position. If passed two Integer objects, returns a substring starting at the offset given by the first, and a length given by the second. If given a Range, a substring containing bytes at offsets given by the range is returned. In all three cases, if an offset is negative, it is counted from the end of str. Returns nil if the initial offset falls outside the string, the length is negative, or the beginning of the range is greater than the end. The encoding of the resulted string keeps original encoding.

"hello".byteslice(1)     #=> "e"
"hello".byteslice(-1)    #=> "o"
"hello".byteslice(1, 2)  #=> "el"
"\x80\u3042".byteslice(1, 3) #=> "\u3042"
"\x03\u3042\xff".byteslice(1..3) #=> "\u3042"

5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
# File 'string.c', line 5611

static VALUE
rb_str_byteslice(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
  long beg = NUM2LONG(argv[0]);
  long end = NUM2LONG(argv[1]);
  return str_byte_substr(str, beg, end, TRUE);
    }
    rb_check_arity(argc, 1, 2);
    return str_byte_aref(str, argv[0]);
}

#capitalizeString #capitalize([options]) ⇒ String

Returns a copy of str with the first character converted to uppercase and the remainder to lowercase.

See String#downcase for meaning of options and use with different encodings.

"hello".capitalize    #=> "Hello"
"HELLO".capitalize    #=> "Hello"
"123ABC".capitalize   #=> "123abc"

6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
# File 'string.c', line 6916

static VALUE
rb_str_capitalize(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str;
    if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new_with_class(str, 0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }
    return ret;
}

#capitalize!String? #capitalize!([options]) ⇒ String?

Modifies str by converting the first character to uppercase and the remainder to lowercase. Returns nil if no changes are made. There is an exception for modern Georgian (mkhedruli/MTAVRULI), where the result is the same as for String#downcase, to avoid mixed case.

See String#downcase for meaning of options and use with different encodings.

a = "hello"
a.capitalize!   #=> "Hello"
a               #=> "Hello"
a.capitalize!   #=> nil

6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
# File 'string.c', line 6881

static VALUE
rb_str_capitalize_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
    if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
  str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#casecmp(other_str) ⇒ -1, ...

Case-insensitive version of String#<=>. Currently, case-insensitivity only works on characters A-Z/a-z, not all of Unicode. This is different from String#casecmp?.

"aBcDeF".casecmp("abcde")     #=> 1
"aBcDeF".casecmp("abcdef")    #=> 0
"aBcDeF".casecmp("abcdefg")   #=> -1
"abcdef".casecmp("ABCDEF")    #=> 0

nil is returned if the two strings have incompatible encodings, or if other_str is not a string.

"foo".casecmp(2)   #=> nil
"\u{e4 f6 fc}".encode("ISO-8859-1").casecmp("\u{c4 d6 dc}")   #=> nil

3369
3370
3371
3372
3373
3374
3375
3376
3377
# File 'string.c', line 3369

static VALUE
rb_str_casecmp(VALUE str1, VALUE str2)
{
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
  return Qnil;
    }
    return str_casecmp(str1, s);
}

#casecmp?(other_str) ⇒ true, ...

Returns true if str and other_str are equal after Unicode case folding, false if they are not equal.

"aBcDeF".casecmp?("abcde")     #=> false
"aBcDeF".casecmp?("abcdef")    #=> true
"aBcDeF".casecmp?("abcdefg")   #=> false
"abcdef".casecmp?("ABCDEF")    #=> true
"\u{e4 f6 fc}".casecmp?("\u{c4 d6 dc}")   #=> true

nil is returned if the two strings have incompatible encodings, or if other_str is not a string.

"foo".casecmp?(2)   #=> nil
"\u{e4 f6 fc}".encode("ISO-8859-1").casecmp?("\u{c4 d6 dc}")   #=> nil

3456
3457
3458
3459
3460
3461
3462
3463
3464
# File 'string.c', line 3456

static VALUE
rb_str_casecmp_p(VALUE str1, VALUE str2)
{
    VALUE s = rb_check_string_type(str2);
    if (NIL_P(s)) {
  return Qnil;
    }
    return str_casecmp_p(str1, s);
}

#center(width, padstr = ' ') ⇒ String

Centers str in width. If width is greater than the length of str, returns a new String of length width with str centered and padded with padstr; otherwise, returns str.

"hello".center(4)         #=> "hello"
"hello".center(20)        #=> "       hello        "
"hello".center(20, '123') #=> "1231231hello12312312"

9759
9760
9761
9762
9763
# File 'string.c', line 9759

static VALUE
rb_str_center(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'c');
}

#charsArray

Returns an array of characters in str. This is a shorthand for str.each_char.to_a.

If a block is given, which is a deprecated form, works the same as each_char.


8516
8517
8518
8519
8520
8521
# File 'string.c', line 8516

static VALUE
rb_str_chars(VALUE str)
{
    VALUE ary = WANTARRAY("chars", rb_str_strlen(str));
    return rb_str_enumerate_chars(str, ary);
}

#chomp(separator = $/) ⇒ String

Returns a new String with the given record separator removed from the end of str (if present). If $/ has not been changed from the default Ruby record separator, then chomp also removes carriage return characters (that is it will remove \n, \r, and \r\n). If $/ is an empty string, it will remove all trailing newlines from the string.

"hello".chomp                #=> "hello"
"hello\n".chomp              #=> "hello"
"hello\r\n".chomp            #=> "hello"
"hello\n\r".chomp            #=> "hello\n"
"hello\r".chomp              #=> "hello"
"hello \n there".chomp       #=> "hello \n there"
"hello".chomp("llo")         #=> "he"
"hello\r\n\r\n".chomp('')    #=> "hello"
"hello\r\n\r\r\n".chomp('')  #=> "hello\r\n\r"

8993
8994
8995
8996
8997
8998
8999
# File 'string.c', line 8993

static VALUE
rb_str_chomp(int argc, VALUE *argv, VALUE str)
{
    VALUE rs = chomp_rs(argc, argv);
    if (NIL_P(rs)) return rb_str_dup(str);
    return rb_str_subseq(str, 0, chompped_length(str, rs));
}

#chomp!(separator = $/) ⇒ String?

Modifies str in place as described for String#chomp, returning str, or nil if no modifications were made.


8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
# File 'string.c', line 8959

static VALUE
rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE rs;
    str_modifiable(str);
    if (RSTRING_LEN(str) == 0) return Qnil;
    rs = chomp_rs(argc, argv);
    if (NIL_P(rs)) return Qnil;
    return rb_str_chomp_string(str, rs);
}

#chopString

Returns a new String with the last character removed. If the string ends with \r\n, both characters are removed. Applying chop to an empty string returns an empty string. String#chomp is often a safer alternative, as it leaves the string unchanged if it doesn't end in a record separator.

"string\r\n".chop   #=> "string"
"string\n\r".chop   #=> "string\n"
"string\n".chop     #=> "string"
"string".chop       #=> "strin"
"x".chop.chop       #=> ""

8810
8811
8812
8813
8814
# File 'string.c', line 8810

static VALUE
rb_str_chop(VALUE str)
{
    return rb_str_subseq(str, 0, chopped_length(str));
}

#chop!String?

Processes str as for String#chop, returning str, or nil if str is the empty string. See also String#chomp!.


8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
# File 'string.c', line 8774

static VALUE
rb_str_chop_bang(VALUE str)
{
    str_modify_keep_cr(str);
    if (RSTRING_LEN(str) > 0) {
  long len;
  len = chopped_length(str);
  STR_SET_LEN(str, len);
  TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
      ENC_CODERANGE_CLEAR(str);
  }
  return str;
    }
    return Qnil;
}

#chrString

Returns a one-character string at the beginning of the string.

a = "abcde"
a.chr    #=> "a"

5422
5423
5424
5425
5426
# File 'string.c', line 5422

static VALUE
rb_str_chr(VALUE str)
{
    return rb_str_substr(str, 0, 1);
}

#clearString

Makes string empty.

a = "abcde"
a.clear    #=> ""

5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
# File 'string.c', line 5398

static VALUE
rb_str_clear(VALUE str)
{
    str_discard(str);
    STR_SET_EMBED(str);
    STR_SET_EMBED_LEN(str, 0);
    RSTRING_PTR(str)[0] = 0;
    if (rb_enc_asciicompat(STR_ENC_GET(str)))
  ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
    else
  ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
    return str;
}

#codepointsArray

Returns an array of the Integer ordinals of the characters in str. This is a shorthand for str.each_codepoint.to_a.

If a block is given, which is a deprecated form, works the same as each_codepoint.


8591
8592
8593
8594
8595
8596
# File 'string.c', line 8591

static VALUE
rb_str_codepoints(VALUE str)
{
    VALUE ary = WANTARRAY("codepoints", rb_str_strlen(str));
    return rb_str_enumerate_codepoints(str, ary);
}

#concat(obj1, obj2, ...) ⇒ String

Concatenates the given object(s) to str. If an object is an Integer, it is considered a codepoint and converted to a character before concatenation.

concat can take multiple arguments, and all the arguments are concatenated in order.

a = "hello "
a.concat("world", 33)      #=> "hello world!"
a                          #=> "hello world!"

b = "sn"
b.concat("_", b, "_", b)   #=> "sn_sn_sn"

See also String#<<, which takes a single argument.


3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
# File 'string.c', line 3043

static VALUE
rb_str_concat_multi(int argc, VALUE *argv, VALUE str)
{
    str_modifiable(str);

    if (argc == 1) {
  return rb_str_concat(str, argv[0]);
    }
    else if (argc > 1) {
  int i;
  VALUE arg_str = rb_str_tmp_new(0);
  rb_enc_copy(arg_str, str);
  for (i = 0; i < argc; i++) {
      rb_str_concat(arg_str, argv[i]);
  }
  rb_str_buf_append(str, arg_str);
    }

    return str;
}

#count([other_str]) ⇒ Integer

Each other_str parameter defines a set of characters to count. The intersection of these sets defines the characters to count in str. Any other_str that starts with a caret ^ is negated. The sequence c1-c2 means all characters between c1 and c2. The backslash character \ can be used to escape ^ or - and is otherwise ignored unless it appears at the end of a sequence or the end of a other_str.

a = "hello world"
a.count "lo"                   #=> 5
a.count "lo", "o"              #=> 2
a.count "hello", "^l"          #=> 4
a.count "ej-m"                 #=> 4

"hello^world".count "\\^aeiou" #=> 4
"hello-world".count "a\\-eo"   #=> 4

c = "hello world\\r\\n"
c.count "\\"                   #=> 2
c.count "\\A"                  #=> 0
c.count "X-\\w"                #=> 3

7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
# File 'string.c', line 7724

static VALUE
rb_str_count(int argc, VALUE *argv, VALUE str)
{
    char table[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    VALUE del = 0, nodel = 0, tstr;
    char *s, *send;
    int i;
    int ascompat;

    rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);

    tstr = argv[0];
    StringValue(tstr);
    enc = rb_enc_check(str, tstr);
    if (argc == 1) {
  const char *ptstr;
  if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
      (ptstr = RSTRING_PTR(tstr),
       ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
      !is_broken_string(str)) {
      int n = 0;
      int clen;
      unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);

      s = RSTRING_PTR(str);
      if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
      send = RSTRING_END(str);
      while (s < send) {
    if (*(unsigned char*)s++ == c) n++;
      }
      return INT2NUM(n);
  }
    }

    tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
    for (i=1; i<argc; i++) {
  tstr = argv[i];
  StringValue(tstr);
  enc = rb_enc_check(str, tstr);
  tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
    }

    s = RSTRING_PTR(str);
    if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
    send = RSTRING_END(str);
    ascompat = rb_enc_asciicompat(enc);
    i = 0;
    while (s < send) {
  unsigned int c;

  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
      if (table[c]) {
    i++;
      }
      s++;
  }
  else {
      int clen;
      c = rb_enc_codepoint_len(s, send, &clen, enc);
      if (tr_find(c, table, del, nodel)) {
    i++;
      }
      s += clen;
  }
    }

    return INT2NUM(i);
}

#crypt(salt_str) ⇒ String

Returns the string generated by calling crypt(3) standard library function with str and salt_str, in this order, as its arguments. Please do not use this method any longer. It is legacy; provided only for backward compatibility with ruby scripts in earlier days. It is bad to use in contemporary programs for several reasons:

  • Behaviour of C's crypt(3) depends on the OS it is run. The generated string lacks data portability.

  • On some OSes such as Mac OS, crypt(3) never fails (i.e. silently ends up in unexpected results).

  • On some OSes such as Mac OS, crypt(3) is not thread safe.

  • So-called “traditional” usage of crypt(3) is very very very weak. According to its manpage, Linux's traditional crypt(3) output has only 2**56 variations; too easy to brute force today. And this is the default behaviour.

  • In order to make things robust some OSes implement so-called “modular” usage. To go through, you have to do a complex build-up of the salt_str parameter, by hand. Failure in generation of a proper salt string tends not to yield any errors; typos in parameters are normally not detectable.

    • For instance, in the following example, the second invocation of String#crypt is wrong; it has a typo in “round=” (lacks “s”). However the call does not fail and something unexpected is generated.

      "foo".crypt("$5$rounds=1000$salt$") # OK, proper usage
      "foo".crypt("$5$round=1000$salt$")  # Typo not detected
      
  • Even in the “modular” mode, some hash functions are considered archaic and no longer recommended at all; for instance module $1$ is officially abandoned by its author: see phk.freebsd.dk/sagas/md5crypt_eol.html . For another instance module $3$ is considered completely broken: see the manpage of FreeBSD.

  • On some OS such as Mac OS, there is no modular mode. Yet, as written above, crypt(3) on Mac OS never fails. This means even if you build up a proper salt string it generates a traditional DES hash anyways, and there is no way for you to be aware of.

    "foo".crypt("$5$rounds=1000$salt$") # => "$5fNPQMxC5j6."
    

If for some reason you cannot migrate to other secure contemporary password hashing algorithms, install the string-crypt gem and require 'string/crypt' to continue using it.


9475
9476
9477
9478
9479
9480
9481
9482
9483
9484
9485
9486
9487
9488
9489
9490
9491
9492
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527
9528
9529
# File 'string.c', line 9475

static VALUE
rb_str_crypt(VALUE str, VALUE salt)
{
#ifdef HAVE_CRYPT_R
    VALUE databuf;
    struct crypt_data *data;
#   define CRYPT_END() ALLOCV_END(databuf)
#else
    extern char *crypt(const char *, const char *);
#   define CRYPT_END() (void)0
#endif
    VALUE result;
    const char *s, *saltp;
    char *res;
#ifdef BROKEN_CRYPT
    char salt_8bit_clean[3];
#endif

    StringValue(salt);
    mustnot_wchar(str);
    mustnot_wchar(salt);
    if (RSTRING_LEN(salt) < 2) {
      short_salt:
  rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
    }

    s = StringValueCStr(str);
    saltp = RSTRING_PTR(salt);
    if (!saltp[0] || !saltp[1]) goto short_salt;
#ifdef BROKEN_CRYPT
    if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
  salt_8bit_clean[0] = saltp[0] & 0x7f;
  salt_8bit_clean[1] = saltp[1] & 0x7f;
  salt_8bit_clean[2] = '\0';
  saltp = salt_8bit_clean;
    }
#endif
#ifdef HAVE_CRYPT_R
    data = ALLOCV(databuf, sizeof(struct crypt_data));
# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
    data->initialized = 0;
# endif
    res = crypt_r(s, saltp, data);
#else
    res = crypt(s, saltp);
#endif
    if (!res) {
  int err = errno;
  CRYPT_END();
  rb_syserr_fail(err, "crypt");
    }
    result = rb_str_new_cstr(res);
    CRYPT_END();
    return result;
}

#delete([other_str]) ⇒ String

Returns a copy of str with all characters in the intersection of its arguments deleted. Uses the same rules for building the set of characters as String#count.

"hello".delete "l","lo"        #=> "heo"
"hello".delete "lo"            #=> "he"
"hello".delete "aeiou", "^e"   #=> "hell"
"hello".delete "ej-m"          #=> "ho"

7543
7544
7545
7546
7547
7548
7549
# File 'string.c', line 7543

static VALUE
rb_str_delete(int argc, VALUE *argv, VALUE str)
{
    str = rb_str_dup(str);
    rb_str_delete_bang(argc, argv, str);
    return str;
}

#delete!([other_str]) ⇒ String?

Performs a delete operation in place, returning str, or nil if str was not modified.


7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
# File 'string.c', line 7467

static VALUE
rb_str_delete_bang(int argc, VALUE *argv, VALUE str)
{
    char squeez[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    char *s, *send, *t;
    VALUE del = 0, nodel = 0;
    int modify = 0;
    int i, ascompat, cr;

    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
    rb_check_arity(argc, 1, UNLIMITED_ARGUMENTS);
    for (i=0; i<argc; i++) {
  VALUE s = argv[i];

  StringValue(s);
  enc = rb_enc_check(str, s);
  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
    }

    str_modify_keep_cr(str);
    ascompat = rb_enc_asciicompat(enc);
    s = t = RSTRING_PTR(str);
    send = RSTRING_END(str);
    cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
    while (s < send) {
  unsigned int c;
  int clen;

  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
      if (squeez[c]) {
    modify = 1;
      }
      else {
    if (t != s) *t = c;
    t++;
      }
      s++;
  }
  else {
      c = rb_enc_codepoint_len(s, send, &clen, enc);

      if (tr_find(c, squeez, del, nodel)) {
    modify = 1;
      }
      else {
    if (t != s) rb_enc_mbcput(c, t, enc);
    t += clen;
    if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
      }
      s += clen;
  }
    }
    TERM_FILL(t, TERM_LEN(str));
    STR_SET_LEN(str, t - RSTRING_PTR(str));
    ENC_CODERANGE_SET(str, cr);

    if (modify) return str;
    return Qnil;
}

#delete_prefix(prefix) ⇒ String

Returns a copy of str with leading prefix deleted.

"hello".delete_prefix("hel") #=> "lo"
"hello".delete_prefix("llo") #=> "hello"

9994
9995
9996
9997
9998
9999
10000
10001
10002
10003
# File 'string.c', line 9994

static VALUE
rb_str_delete_prefix(VALUE str, VALUE prefix)
{
    long prefixlen;

    prefixlen = deleted_prefix_length(str, prefix);
    if (prefixlen <= 0) return rb_str_dup(str);

    return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
}

#delete_prefix!(prefix) ⇒ self?

Deletes leading prefix from str, returning nil if no change was made.

"hello".delete_prefix!("hel") #=> "lo"
"hello".delete_prefix!("llo") #=> nil

9972
9973
9974
9975
9976
9977
9978
9979
9980
9981
9982
# File 'string.c', line 9972

static VALUE
rb_str_delete_prefix_bang(VALUE str, VALUE prefix)
{
    long prefixlen;
    str_modify_keep_cr(str);

    prefixlen = deleted_prefix_length(str, prefix);
    if (prefixlen <= 0) return Qnil;

    return rb_str_drop_bytes(str, prefixlen);
}

#delete_suffix(suffix) ⇒ String

Returns a copy of str with trailing suffix deleted.

"hello".delete_suffix("llo") #=> "he"
"hello".delete_suffix("hel") #=> "hello"

10080
10081
10082
10083
10084
10085
10086
10087
10088
10089
# File 'string.c', line 10080

static VALUE
rb_str_delete_suffix(VALUE str, VALUE suffix)
{
    long suffixlen;

    suffixlen = deleted_suffix_length(str, suffix);
    if (suffixlen <= 0) return rb_str_dup(str);

    return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
}

#delete_suffix!(suffix) ⇒ self?

Deletes trailing suffix from str, returning nil if no change was made.

"hello".delete_suffix!("llo") #=> "he"
"hello".delete_suffix!("hel") #=> nil

10050
10051
10052
10053
10054
10055
10056
10057
10058
10059
10060
10061
10062
10063
10064
10065
10066
10067
10068
# File 'string.c', line 10050

static VALUE
rb_str_delete_suffix_bang(VALUE str, VALUE suffix)
{
    long olen, suffixlen, len;
    str_modifiable(str);

    suffixlen = deleted_suffix_length(str, suffix);
    if (suffixlen <= 0) return Qnil;

    olen = RSTRING_LEN(str);
    str_modify_keep_cr(str);
    len = olen - suffixlen;
    STR_SET_LEN(str, len);
    TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
    if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
  ENC_CODERANGE_CLEAR(str);
    }
    return str;
}

#downcaseString #downcase([options]) ⇒ String

Returns a copy of str with all uppercase letters replaced with their lowercase counterparts. Which letters exactly are replaced, and by which other letters, depends on the presence or absence of options, and on the encoding of the string.

The meaning of the options is as follows:

No option

Full Unicode case mapping, suitable for most languages (see :turkic and :lithuanian options below for exceptions). Context-dependent case mapping as described in Table 3-14 of the Unicode standard is currently not supported.

:ascii

Only the ASCII region, i.e. the characters “A'' to “Z'' and “a'' to “z'', are affected. This option cannot be combined with any other option.

:turkic

Full Unicode case mapping, adapted for Turkic languages (Turkish, Azerbaijani, …). This means that upper case I is mapped to lower case dotless i, and so on.

:lithuanian

Currently, just full Unicode case mapping. In the future, full Unicode case mapping adapted for Lithuanian (keeping the dot on the lower case i even if there is an accent on top).

:fold

Only available on downcase and downcase!. Unicode case folding, which is more far-reaching than Unicode case mapping. This option currently cannot be combined with any other option (i.e. there is currently no variant for turkic languages).

Please note that several assumptions that are valid for ASCII-only case conversions do not hold for more general case conversions. For example, the length of the result may not be the same as the length of the input (neither in characters nor in bytes), some roundtrip assumptions (e.g. str.downcase == str.upcase.downcase) may not apply, and Unicode normalization (i.e. String#unicode_normalize) is not necessarily maintained by case mapping operations.

Non-ASCII case mapping/folding is currently supported for UTF-8, UTF-16BE/LE, UTF-32BE/LE, and ISO-8859-1~16 Strings/Symbols. This support will be extended to other encodings.

"hEllO".downcase   #=> "hello"

6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
# File 'string.c', line 6837

static VALUE
rb_str_downcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        ret = rb_str_new_with_class(str, RSTRING_PTR(str), RSTRING_LEN(str));
        str_enc_copy(ret, str);
        downcase_single(ret);
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new_with_class(str, 0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }

    return ret;
}

#downcase!String? #downcase!([options]) ⇒ String?

Downcases the contents of str, returning nil if no changes were made.

See String#downcase for meaning of options and use with different encodings.


6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
# File 'string.c', line 6764

static VALUE
rb_str_downcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        if (downcase_single(str))
            flags |= ONIGENC_CASE_MODIFIED;
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
  str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#dumpString

Returns a quoted version of the string with all non-printing characters replaced by \xHH notation and all special characters escaped.

This method can be used for round-trip: if the resulting new_str is eval'ed, it will produce the original string.

"hello \n ''".dump     #=> "\"hello \\n ''\""
"\f\x00\xff\\\"".dump  #=> "\"\\f\\x00\\xFF\\\\\\\"\""

See also String#undump.


6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
# File 'string.c', line 6056

VALUE
rb_str_dump(VALUE str)
{
    int encidx = rb_enc_get_index(str);
    rb_encoding *enc = rb_enc_from_index(encidx);
    long len;
    const char *p, *pend;
    char *q, *qend;
    VALUE result;
    int u8 = (encidx == rb_utf8_encindex());
    static const char nonascii_suffix[] = ".dup.force_encoding(\"%s\")";

    len = 2;      /* "" */
    if (!rb_enc_asciicompat(enc)) {
  len += strlen(nonascii_suffix) - rb_strlen_lit("%s");
  len += strlen(enc->name);
    }

    p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
    while (p < pend) {
  int clen;
  unsigned char c = *p++;

  switch (c) {
    case '"':  case '\\':
    case '\n': case '\r':
    case '\t': case '\f':
    case '\013': case '\010': case '\007': case '\033':
      clen = 2;
      break;

    case '#':
      clen = IS_EVSTR(p, pend) ? 2 : 1;
      break;

    default:
      if (ISPRINT(c)) {
    clen = 1;
      }
      else {
    if (u8 && c > 0x7F) { /* \u notation */
        int n = rb_enc_precise_mbclen(p-1, pend, enc);
        if (MBCLEN_CHARFOUND_P(n)) {
      unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
      if (cc <= 0xFFFF)
          clen = 6;  /* \uXXXX */
      else if (cc <= 0xFFFFF)
          clen = 9;  /* \u{XXXXX} */
      else
          clen = 10; /* \u{XXXXXX} */
      p += MBCLEN_CHARFOUND_LEN(n)-1;
      break;
        }
    }
    clen = 4; /* \xNN */
      }
      break;
  }

  if (clen > LONG_MAX - len) {
      rb_raise(rb_eRuntimeError, "string size too big");
  }
  len += clen;
    }

    result = rb_str_new_with_class(str, 0, len);
    p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
    q = RSTRING_PTR(result); qend = q + len + 1;

    *q++ = '"';
    while (p < pend) {
  unsigned char c = *p++;

  if (c == '"' || c == '\\') {
      *q++ = '\\';
      *q++ = c;
  }
  else if (c == '#') {
      if (IS_EVSTR(p, pend)) *q++ = '\\';
      *q++ = '#';
  }
  else if (c == '\n') {
      *q++ = '\\';
      *q++ = 'n';
  }
  else if (c == '\r') {
      *q++ = '\\';
      *q++ = 'r';
  }
  else if (c == '\t') {
      *q++ = '\\';
      *q++ = 't';
  }
  else if (c == '\f') {
      *q++ = '\\';
      *q++ = 'f';
  }
  else if (c == '\013') {
      *q++ = '\\';
      *q++ = 'v';
  }
  else if (c == '\010') {
      *q++ = '\\';
      *q++ = 'b';
  }
  else if (c == '\007') {
      *q++ = '\\';
      *q++ = 'a';
  }
  else if (c == '\033') {
      *q++ = '\\';
      *q++ = 'e';
  }
  else if (ISPRINT(c)) {
      *q++ = c;
  }
  else {
      *q++ = '\\';
      if (u8) {
    int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
    if (MBCLEN_CHARFOUND_P(n)) {
        int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
        p += n;
        if (cc <= 0xFFFF)
      snprintf(q, qend-q, "u%04X", cc);    /* \uXXXX */
        else
      snprintf(q, qend-q, "u{%X}", cc);  /* \u{XXXXX} or \u{XXXXXX} */
        q += strlen(q);
        continue;
    }
      }
      snprintf(q, qend-q, "x%02X", c);
      q += 3;
  }
    }
    *q++ = '"';
    *q = '\0';
    if (!rb_enc_asciicompat(enc)) {
  snprintf(q, qend-q, nonascii_suffix, enc->name);
  encidx = rb_ascii8bit_encindex();
    }
    /* result from dump is ASCII */
    rb_enc_associate_index(result, encidx);
    ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
    return result;
}

#each_byte {|integer| ... } ⇒ String #each_byteObject

Passes each byte in str to the given block, or returns an enumerator if no block is given.

"hello".each_byte {|c| print c, ' ' }

produces:

104 101 108 108 111

Overloads:

  • #each_byte {|integer| ... } ⇒ String

    Yields:

    • (integer)

8420
8421
8422
8423
8424
8425
# File 'string.c', line 8420

static VALUE
rb_str_each_byte(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_byte_size);
    return rb_str_enumerate_bytes(str, 0);
}

#each_char {|cstr| ... } ⇒ String #each_charObject

Passes each character in str to the given block, or returns an enumerator if no block is given.

"hello".each_char {|c| print c, ' ' }

produces:

h e l l o

Overloads:

  • #each_char {|cstr| ... } ⇒ String

    Yields:

    • (cstr)

8498
8499
8500
8501
8502
8503
# File 'string.c', line 8498

static VALUE
rb_str_each_char(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
    return rb_str_enumerate_chars(str, 0);
}

#each_codepoint {|integer| ... } ⇒ String #each_codepointObject

Passes the Integer ordinal of each character in str, also known as a codepoint when applied to Unicode strings to the given block. For encodings other than UTF-8/UTF-16(BE|LE)/UTF-32(BE|LE), values are directly derived from the binary representation of each character.

If no block is given, an enumerator is returned instead.

"hello\u0639".each_codepoint {|c| print c, ' ' }

produces:

104 101 108 108 111 1593

Overloads:

  • #each_codepoint {|integer| ... } ⇒ String

    Yields:

    • (integer)

8572
8573
8574
8575
8576
8577
# File 'string.c', line 8572

static VALUE
rb_str_each_codepoint(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
    return rb_str_enumerate_codepoints(str, 0);
}

#each_grapheme_cluster {|cstr| ... } ⇒ String #each_grapheme_clusterObject

Passes each grapheme cluster in str to the given block, or returns an enumerator if no block is given. Unlike String#each_char, this enumerates by grapheme clusters defined by Unicode Standard Annex #29 unicode.org/reports/tr29/

"a\u0300".each_char.to_a.size #=> 2
"a\u0300".each_grapheme_cluster.to_a.size #=> 1

Overloads:

  • #each_grapheme_cluster {|cstr| ... } ⇒ String

    Yields:

    • (cstr)

8722
8723
8724
8725
8726
8727
# File 'string.c', line 8722

static VALUE
rb_str_each_grapheme_cluster(VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
    return rb_str_enumerate_grapheme_clusters(str, 0);
}

#each_line(separator = $/, chomp: false) {|substr| ... } ⇒ String #each_line(separator = $/, chomp: false) ⇒ Object

Splits str using the supplied parameter as the record separator ($/ by default), passing each substring in turn to the supplied block. If a zero-length record separator is supplied, the string is split into paragraphs delimited by multiple successive newlines.

If chomp is true, separator will be removed from the end of each line.

If no block is given, an enumerator is returned instead.

"hello\nworld".each_line {|s| p s}
# prints:
#   "hello\n"
#   "world"

"hello\nworld".each_line('l') {|s| p s}
# prints:
#   "hel"
#   "l"
#   "o\nworl"
#   "d"

"hello\n\n\nworld".each_line('') {|s| p s}
# prints
#   "hello\n\n"
#   "world"

"hello\nworld".each_line(chomp: true) {|s| p s}
# prints:
#   "hello"
#   "world"

"hello\nworld".each_line('l', chomp: true) {|s| p s}
# prints:
#   "he"
#   ""
#   "o\nwor"
#   "d"

Overloads:

  • #each_line(separator = $/, chomp: false) {|substr| ... } ⇒ String

    Yields:

    • (substr)

8352
8353
8354
8355
8356
8357
# File 'string.c', line 8352

static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
{
    RETURN_SIZED_ENUMERATOR(str, argc, argv, 0);
    return rb_str_enumerate_lines(argc, argv, str, 0);
}

#empty?Boolean

Returns true if str has a length of zero.

"hello".empty?   #=> false
" ".empty?       #=> false
"".empty?        #=> true

1890
1891
1892
1893
1894
1895
1896
# File 'string.c', line 1890

static VALUE
rb_str_empty(VALUE str)
{
    if (RSTRING_LEN(str) == 0)
  return Qtrue;
    return Qfalse;
}

#encode(encoding[, options]) ⇒ String #encode(dst_encoding, src_encoding[, options]) ⇒ String #encode([options]) ⇒ String

The first form returns a copy of str transcoded to encoding encoding. The second form returns a copy of str transcoded from src_encoding to dst_encoding. The last form returns a copy of str transcoded to Encoding.default_internal.

By default, the first and second form raise Encoding::UndefinedConversionError for characters that are undefined in the destination encoding, and Encoding::InvalidByteSequenceError for invalid byte sequences in the source encoding. The last form by default does not raise exceptions but uses replacement strings.

The options Hash gives details for conversion and can have the following keys:

:invalid

If the value is :replace, #encode replaces invalid byte sequences in str with the replacement character. The default is to raise the Encoding::InvalidByteSequenceError exception

:undef

If the value is :replace, #encode replaces characters which are undefined in the destination encoding with the replacement character. The default is to raise the Encoding::UndefinedConversionError.

:replace

Sets the replacement string to the given value. The default replacement string is “uFFFD” for Unicode encoding forms, and “?” otherwise.

:fallback

Sets the replacement string by the given object for undefined character. The object should be a Hash, a Proc, a Method, or an object which has [] method. Its key is an undefined character encoded in the source encoding of current transcoder. Its value can be any encoding until it can be converted into the destination encoding of the transcoder.

:xml

The value must be :text or :attr. If the value is :text #encode replaces undefined characters with their (upper-case hexadecimal) numeric character references. '&', '<', and '>' are converted to “&amp;”, “&lt;”, and “&gt;”, respectively. If the value is :attr, #encode also quotes the replacement result (using '“'), and replaces '”' with “&quot;”.

:cr_newline

Replaces LF (“n”) with CR (“r”) if value is true.

:crlf_newline

Replaces LF (“n”) with CRLF (“rn”) if value is true.

:universal_newline

Replaces CRLF (“rn”) and CR (“r”) with LF (“n”) if value is true.


2868
2869
2870
2871
2872
2873
2874
# File 'transcode.c', line 2868

static VALUE
str_encode(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr = str;
    int encidx = str_transcode(argc, argv, &newstr);
    return encoded_dup(newstr, str, encidx);
}

#encode!(encoding[, options]) ⇒ String #encode!(dst_encoding, src_encoding[, options]) ⇒ String

The first form transcodes the contents of str from str.encoding to encoding. The second form transcodes the contents of str from src_encoding to dst_encoding. The options Hash gives details for conversion. See String#encode for details. Returns the string even if no changes were made.


2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
# File 'transcode.c', line 2790

static VALUE
str_encode_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE newstr;
    int encidx;

    rb_check_frozen(str);

    newstr = str;
    encidx = str_transcode(argc, argv, &newstr);

    if (encidx < 0) return str;
    if (newstr == str) {
  rb_enc_associate_index(str, encidx);
  return str;
    }
    rb_str_shared_replace(str, newstr);
    return str_encode_associate(str, encidx);
}

#encodingEncoding

Returns the Encoding object that represents the encoding of obj.


1013
1014
1015
1016
1017
1018
1019
1020
1021
# File 'encoding.c', line 1013

VALUE
rb_obj_encoding(VALUE obj)
{
    int idx = rb_enc_get_index(obj);
    if (idx < 0) {
  rb_raise(rb_eTypeError, "unknown encoding");
    }
    return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
}

#end_with?([suffixes]) ⇒ Boolean

Returns true if str ends with one of the suffixes given.

"hello".end_with?("ello")               #=> true

# returns true if one of the +suffixes+ matches.
"hello".end_with?("heaven", "ello")     #=> true
"hello".end_with?("heaven", "paradise") #=> false

9907
9908
9909
9910
9911
9912
9913
9914
9915
9916
9917
9918
9919
9920
9921
9922
9923
9924
9925
9926
9927
9928
# File 'string.c', line 9907

static VALUE
rb_str_end_with(int argc, VALUE *argv, VALUE str)
{
    int i;
    char *p, *s, *e;
    rb_encoding *enc;

    for (i=0; i<argc; i++) {
  VALUE tmp = argv[i];
  StringValue(tmp);
  enc = rb_enc_check(str, tmp);
  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
  p = RSTRING_PTR(str);
        e = p + RSTRING_LEN(str);
  s = e - RSTRING_LEN(tmp);
  if (rb_enc_left_char_head(p, s, e, enc) != s)
      continue;
  if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
      return Qtrue;
    }
    return Qfalse;
}

#eql?(other) ⇒ Boolean

Two strings are equal if they have the same length and content.


3301
3302
3303
3304
3305
3306
3307
# File 'string.c', line 3301

MJIT_FUNC_EXPORTED VALUE
rb_str_eql(VALUE str1, VALUE str2)
{
    if (str1 == str2) return Qtrue;
    if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
    return rb_str_eql_internal(str1, str2);
}

#force_encoding(encoding) ⇒ String

Changes the encoding to encoding and returns self.


10123
10124
10125
10126
10127
10128
10129
10130
# File 'string.c', line 10123

static VALUE
rb_str_force_encoding(VALUE str, VALUE enc)
{
    str_modifiable(str);
    rb_enc_associate(str, rb_to_encoding(enc));
    ENC_CODERANGE_CLEAR(str);
    return str;
}

#freezeObject


2630
2631
2632
2633
2634
2635
2636
# File 'string.c', line 2630

VALUE
rb_str_freeze(VALUE str)
{
    if (OBJ_FROZEN(str)) return str;
    rb_str_resize(str, RSTRING_LEN(str));
    return rb_obj_freeze(str);
}

#getbyte(index) ⇒ 0 .. 255

returns the indexth byte as an integer.


5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
# File 'string.c', line 5434

static VALUE
rb_str_getbyte(VALUE str, VALUE index)
{
    long pos = NUM2LONG(index);

    if (pos < 0)
        pos += RSTRING_LEN(str);
    if (pos < 0 ||  RSTRING_LEN(str) <= pos)
        return Qnil;

    return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
}

#grapheme_clustersArray

Returns an array of grapheme clusters in str. This is a shorthand for str.each_grapheme_cluster.to_a.

If a block is given, which is a deprecated form, works the same as each_grapheme_cluster.


8740
8741
8742
8743
8744
8745
# File 'string.c', line 8740

static VALUE
rb_str_grapheme_clusters(VALUE str)
{
    VALUE ary = WANTARRAY("grapheme_clusters", rb_str_strlen(str));
    return rb_str_enumerate_grapheme_clusters(str, ary);
}

#gsub(pattern, replacement) ⇒ String #gsub(pattern, hash) ⇒ String #gsub(pattern) {|match| ... } ⇒ String #gsub(pattern) ⇒ Object

Returns a copy of str with all occurrences of pattern substituted for the second argument. The pattern is typically a Regexp; if given as a String, any regular expression metacharacters it contains will be interpreted literally, e.g. \d will match a backslash followed by 'd', instead of a digit.

If replacement is a String it will be substituted for the matched text. It may contain back-references to the pattern's capture groups of the form \d, where d is a group number, or \k<n>, where n is a group name. Similarly, \&, \', \`, and + correspond to special variables, $&, $', $`, and $+, respectively. (See regexp.rdoc for details.) \0 is the same as \&. \\ is interpreted as an escape, i.e., a single backslash. Note that, within replacement the special match variables, such as $&, will not refer to the current match.

If the second argument is a Hash, and the matched text is one of its keys, the corresponding value is the replacement string.

In the block form, the current match string is passed in as a parameter, and variables such as $1, $2, $`, $&, and $' will be set appropriately. (See regexp.rdoc for details.) The value returned by the block will be substituted for the match on each call.

When neither a block nor a second argument is supplied, an Enumerator is returned.

"hello".gsub(/[aeiou]/, '*')                  #=> "h*ll*"
"hello".gsub(/([aeiou])/, '<\1>')             #=> "h<e>ll<o>"
"hello".gsub(/./) {|s| s.ord.to_s + ' '}      #=> "104 101 108 108 111 "
"hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}')  #=> "h{e}ll{o}"
'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*')    #=> "h3ll*"

Note that a string literal consumes backslashes. (See syntax/literals.rdoc for details on string literals.) Back-references are typically preceded by an additional backslash. For example, if you want to write a back-reference \& in replacement with a double-quoted string literal, you need to write: "..\\&..". If you want to write a non-back-reference string \& in replacement, you need first to escape the backslash to prevent this method from interpreting it as a back-reference, and then you need to escape the backslashes again to prevent a string literal from consuming them: "..\\\\&..". You may want to use the block form to avoid a lot of backslashes.

Overloads:

  • #gsub(pattern) {|match| ... } ⇒ String

    Yields:


5359
5360
5361
5362
5363
# File 'string.c', line 5359

static VALUE
rb_str_gsub(int argc, VALUE *argv, VALUE str)
{
    return str_gsub(argc, argv, str, 0);
}

#gsub!(pattern, replacement) ⇒ String? #gsub!(pattern, hash) ⇒ String? #gsub!(pattern) {|match| ... } ⇒ String? #gsub!(pattern) ⇒ Object

Performs the substitutions of String#gsub in place, returning str, or nil if no substitutions were performed. If no block and no replacement is given, an enumerator is returned instead.

Overloads:

  • #gsub!(pattern) {|match| ... } ⇒ String?

    Yields:


5291
5292
5293
5294
5295
5296
# File 'string.c', line 5291

static VALUE
rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
{
    str_modify_keep_cr(str);
    return str_gsub(argc, argv, str, 1);
}

#hashInteger

Returns a hash based on the string's length, content and encoding.

See also Object#hash.


3208
3209
3210
3211
3212
3213
# File 'string.c', line 3208

static VALUE
rb_str_hash_m(VALUE str)
{
    st_index_t hval = rb_str_hash(str);
    return ST2FIX(hval);
}

#hexInteger

Treats leading characters from str as a string of hexadecimal digits (with an optional sign and an optional 0x) and returns the corresponding number. Zero is returned on error.

"0x0a".hex     #=> 10
"-1234".hex    #=> -4660
"0".hex        #=> 0
"wombat".hex   #=> 0

9384
9385
9386
9387
9388
# File 'string.c', line 9384

static VALUE
rb_str_hex(VALUE str)
{
    return rb_str_to_inum(str, 16, FALSE);
}

#include?(other_str) ⇒ Boolean

Returns true if str contains the given string or character.

"hello".include? "lo"   #=> true
"hello".include? "ol"   #=> false
"hello".include? ?h     #=> true

5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
# File 'string.c', line 5729

static VALUE
rb_str_include(VALUE str, VALUE arg)
{
    long i;

    StringValue(arg);
    i = rb_str_index(str, arg, 0);

    if (i == -1) return Qfalse;
    return Qtrue;
}

#index(substring[, offset]) ⇒ Integer? #index(regexp[, offset]) ⇒ Integer?

Returns the index of the first occurrence of the given substring or pattern (regexp) in str. Returns nil if not found. If the second parameter is present, it specifies the position in the string to begin the search.

"hello".index('e')             #=> 1
"hello".index('lo')            #=> 3
"hello".index('a')             #=> nil
"hello".index(?e)              #=> 1
"hello".index(/[aeiou]/, -3)   #=> 4

3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
# File 'string.c', line 3562

static VALUE
rb_str_index_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE initpos;
    long pos;

    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
  pos = NUM2LONG(initpos);
    }
    else {
  pos = 0;
    }
    if (pos < 0) {
  pos += str_strlen(str, NULL);
  if (pos < 0) {
      if (RB_TYPE_P(sub, T_REGEXP)) {
    rb_backref_set(Qnil);
      }
      return Qnil;
  }
    }

    if (SPECIAL_CONST_P(sub)) goto generic;
    switch (BUILTIN_TYPE(sub)) {
      case T_REGEXP:
  if (pos > str_strlen(str, NULL))
      return Qnil;
  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
       rb_enc_check(str, sub), single_byte_optimizable(str));

  pos = rb_reg_search(sub, str, pos, 0);
  pos = rb_str_sublen(str, pos);
  break;

      generic:
      default: {
  VALUE tmp;

  tmp = rb_check_string_type(sub);
  if (NIL_P(tmp)) {
      rb_raise(rb_eTypeError, "type mismatch: %s given",
         rb_obj_classname(sub));
  }
  sub = tmp;
      }
  /* fall through */
      case T_STRING:
  pos = rb_str_index(str, sub, pos);
  pos = rb_str_sublen(str, pos);
  break;
    }

    if (pos == -1) return Qnil;
    return LONG2NUM(pos);
}

#replace(other_str) ⇒ String

Replaces the contents of str with the corresponding values in other_str.

s = "hello"         #=> "hello"
s.replace "world"   #=> "world"

5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
# File 'string.c', line 5377

VALUE
rb_str_replace(VALUE str, VALUE str2)
{
    str_modifiable(str);
    if (str == str2) return str;

    StringValue(str2);
    str_discard(str);
    return str_replace(str, str2);
}

#insert(index, other_str) ⇒ String

Inserts other_str before the character at the given index, modifying str. Negative indices count from the end of the string, and insert after the given character. The intent is insert aString so that it starts at the given index.

"abcd".insert(0, 'X')    #=> "Xabcd"
"abcd".insert(3, 'X')    #=> "abcXd"
"abcd".insert(4, 'X')    #=> "abcdX"
"abcd".insert(-3, 'X')   #=> "abXcd"
"abcd".insert(-1, 'X')   #=> "abcdX"

4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
# File 'string.c', line 4841

static VALUE
rb_str_insert(VALUE str, VALUE idx, VALUE str2)
{
    long pos = NUM2LONG(idx);

    if (pos == -1) {
  return rb_str_append(str, str2);
    }
    else if (pos < 0) {
  pos++;
    }
    rb_str_splice(str, pos, 0, str2);
    return str;
}

#inspectString

Returns a printable version of str, surrounded by quote marks, with special characters escaped.

str = "hello"
str[3] = "\b"
str.inspect       #=> "\"hel\\bo\""

5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
# File 'string.c', line 5944

VALUE
rb_str_inspect(VALUE str)
{
    int encidx = ENCODING_GET(str);
    rb_encoding *enc = rb_enc_from_index(encidx), *actenc;
    const char *p, *pend, *prev;
    char buf[CHAR_ESC_LEN + 1];
    VALUE result = rb_str_buf_new(0);
    rb_encoding *resenc = rb_default_internal_encoding();
    int unicode_p = rb_enc_unicode_p(enc);
    int asciicompat = rb_enc_asciicompat(enc);

    if (resenc == NULL) resenc = rb_default_external_encoding();
    if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
    rb_enc_associate(result, resenc);
    str_buf_cat2(result, "\"");

    p = RSTRING_PTR(str); pend = RSTRING_END(str);
    prev = p;
    actenc = get_actual_encoding(encidx, str);
    if (actenc != enc) {
  enc = actenc;
  if (unicode_p) unicode_p = rb_enc_unicode_p(enc);
    }
    while (p < pend) {
  unsigned int c, cc;
  int n;

        n = rb_enc_precise_mbclen(p, pend, enc);
        if (!MBCLEN_CHARFOUND_P(n)) {
      if (p > prev) str_buf_cat(result, prev, p - prev);
            n = rb_enc_mbminlen(enc);
            if (pend < p + n)
                n = (int)(pend - p);
            while (n--) {
                snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
                str_buf_cat(result, buf, strlen(buf));
                prev = ++p;
            }
      continue;
  }
        n = MBCLEN_CHARFOUND_LEN(n);
  c = rb_enc_mbc_to_codepoint(p, pend, enc);
  p += n;
  if ((asciicompat || unicode_p) &&
    (c == '"'|| c == '\\' ||
      (c == '#' &&
             p < pend &&
             MBCLEN_CHARFOUND_P(rb_enc_precise_mbclen(p,pend,enc)) &&
             (cc = rb_enc_codepoint(p,pend,enc),
              (cc == '$' || cc == '@' || cc == '{'))))) {
      if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
      str_buf_cat2(result, "\\");
      if (asciicompat || enc == resenc) {
    prev = p - n;
    continue;
      }
  }
  switch (c) {
    case '\n': cc = 'n'; break;
    case '\r': cc = 'r'; break;
    case '\t': cc = 't'; break;
    case '\f': cc = 'f'; break;
    case '\013': cc = 'v'; break;
    case '\010': cc = 'b'; break;
    case '\007': cc = 'a'; break;
    case 033: cc = 'e'; break;
    default: cc = 0; break;
  }
  if (cc) {
      if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
      buf[0] = '\\';
      buf[1] = (char)cc;
      str_buf_cat(result, buf, 2);
      prev = p;
      continue;
  }
  if ((enc == resenc && rb_enc_isprint(c, enc)) ||
      (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
      continue;
  }
  else {
      if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
      rb_str_buf_cat_escaped_char(result, c, unicode_p);
      prev = p;
      continue;
  }
    }
    if (p > prev) str_buf_cat(result, prev, p - prev);
    str_buf_cat2(result, "\"");

    return result;
}

#internObject #to_symObject

Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.

"Koala".intern         #=> :Koala
s = 'cat'.to_sym       #=> :cat
s == :cat              #=> true
s = '@cat'.to_sym      #=> :@cat
s == :@cat             #=> true

This can also be used to create symbols that cannot be represented using the :xxx notation.

'cat and dog'.to_sym   #=> :"cat and dog"

718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
# File 'symbol.c', line 718

VALUE
rb_str_intern(VALUE str)
{
#if USE_SYMBOL_GC
    rb_encoding *enc, *ascii;
    int type;
#else
    ID id;
#endif
    VALUE sym = lookup_str_sym(str);

    if (sym) {
  return sym;
    }

#if USE_SYMBOL_GC
    enc = rb_enc_get(str);
    ascii = rb_usascii_encoding();
    if (enc != ascii && sym_check_asciionly(str)) {
  str = rb_str_dup(str);
  rb_enc_associate(str, ascii);
  OBJ_FREEZE(str);
  enc = ascii;
    }
    else {
        str = rb_str_dup(str);
        OBJ_FREEZE(str);
    }
    str = rb_fstring(str);
    type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
    if (type < 0) type = ID_JUNK;
    return dsymbol_alloc(rb_cSymbol, str, enc, type);
#else
    id = intern_str(str, 0);
    return ID2SYM(id);
#endif
}

#lengthInteger #sizeInteger

Returns the character length of str.


1857
1858
1859
1860
1861
# File 'string.c', line 1857

VALUE
rb_str_length(VALUE str)
{
    return LONG2NUM(str_strlen(str, NULL));
}

#lines(separator = $/, chomp: false) ⇒ Array

Returns an array of lines in str split using the supplied record separator ($/ by default). This is a shorthand for str.each_line(separator, getline_args).to_a.

If chomp is true, separator will be removed from the end of each line.

"hello\nworld\n".lines              #=> ["hello\n", "world\n"]
"hello  world".lines(' ')           #=> ["hello ", " ", "world"]
"hello\nworld\n".lines(chomp: true) #=> ["hello", "world"]

If a block is given, which is a deprecated form, works the same as each_line.


8378
8379
8380
8381
8382
8383
# File 'string.c', line 8378

static VALUE
rb_str_lines(int argc, VALUE *argv, VALUE str)
{
    VALUE ary = WANTARRAY("lines", 0);
    return rb_str_enumerate_lines(argc, argv, str, ary);
}

#ljust(integer, padstr = ' ') ⇒ String

If integer is greater than the length of str, returns a new String of length integer with str left justified and padded with padstr; otherwise, returns str.

"hello".ljust(4)            #=> "hello"
"hello".ljust(20)           #=> "hello               "
"hello".ljust(20, '1234')   #=> "hello123412341234123"

9719
9720
9721
9722
9723
# File 'string.c', line 9719

static VALUE
rb_str_ljust(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'l');
}

#lstripString

Returns a copy of the receiver with leading whitespace removed. See also String#rstrip and String#strip.

Refer to String#strip for the definition of whitespace.

"  hello  ".lstrip   #=> "hello  "
"hello".lstrip       #=> "hello"

9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
# File 'string.c', line 9077

static VALUE
rb_str_lstrip(VALUE str)
{
    char *start;
    long len, loffset;
    RSTRING_GETMEM(str, start, len);
    loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
    if (loffset <= 0) return rb_str_dup(str);
    return rb_str_subseq(str, loffset, len - loffset);
}

#lstrip!self?

Removes leading whitespace from the receiver. Returns the altered receiver, or nil if no change was made. See also String#rstrip! and String#strip!.

Refer to String#strip for the definition of whitespace.

"  hello  ".lstrip!  #=> "hello  "
"hello  ".lstrip!    #=> nil
"hello".lstrip!      #=> nil

9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
# File 'string.c', line 9039

static VALUE
rb_str_lstrip_bang(VALUE str)
{
    rb_encoding *enc;
    char *start, *s;
    long olen, loffset;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    if (loffset > 0) {
  long len = olen-loffset;
  s = start + loffset;
  memmove(start, s, len);
  STR_SET_LEN(str, len);
#if !SHARABLE_MIDDLE_SUBSTRING
  TERM_FILL(start+len, rb_enc_mbminlen(enc));
#endif
  return str;
    }
    return Qnil;
}

#match(pattern) ⇒ MatchData? #match(pattern, pos) ⇒ MatchData?

Converts pattern to a Regexp (if it isn't already one), then invokes its match method on str. If the second parameter is present, it specifies the position in the string to begin the search.

'hello'.match('(.)\1')      #=> #<MatchData "ll" 1:"l">
'hello'.match('(.)\1')[0]   #=> "ll"
'hello'.match(/(.)\1/)[0]   #=> "ll"
'hello'.match(/(.)\1/, 3)   #=> nil
'hello'.match('xx')         #=> nil

If a block is given, invoke the block with MatchData if match succeed, so that you can write

str.match(pat) {|m| ...}

instead of

if m = str.match(pat)
  ...
end

The return value is a value from block execution in this case.


3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
# File 'string.c', line 3853

static VALUE
rb_str_match_m(int argc, VALUE *argv, VALUE str)
{
    VALUE re, result;
    if (argc < 1)
  rb_check_arity(argc, 1, 2);
    re = argv[0];
    argv[0] = str;
    result = rb_funcallv(get_pat(re), rb_intern("match"), argc, argv);
    if (!NIL_P(result) && rb_block_given_p()) {
  return rb_yield(result);
    }
    return result;
}

#match?(pattern) ⇒ Boolean #match?(pattern, pos) ⇒ Boolean

Converts pattern to a Regexp (if it isn't already one), then returns a true or false indicates whether the regexp is matched str or not without updating $~ and other related variables. If the second parameter is present, it specifies the position in the string to begin the search.

"Ruby".match?(/R.../)    #=> true
"Ruby".match?(/R.../, 1) #=> false
"Ruby".match?(/P.../)    #=> false
$&                       #=> nil

3885
3886
3887
3888
3889
3890
3891
3892
# File 'string.c', line 3885

static VALUE
rb_str_match_m_p(int argc, VALUE *argv, VALUE str)
{
    VALUE re;
    rb_check_arity(argc, 1, 2);
    re = get_pat(argv[0]);
    return rb_reg_match_p(re, str, argc > 1 ? NUM2LONG(argv[1]) : 0);
}

#succString #nextString

Returns the successor to str. The successor is calculated by incrementing characters starting from the rightmost alphanumeric (or the rightmost character if there are no alphanumerics) in the string. Incrementing a digit always results in another digit, and incrementing a letter results in another letter of the same case. Incrementing nonalphanumerics uses the underlying character set's collating sequence.

If the increment generates a “carry,'' the character to the left of it is incremented. This process repeats until there is no carry, adding an additional character if necessary.

"abcd".succ        #=> "abce"
"THX1138".succ     #=> "THX1139"
"<<koala>>".succ   #=> "<<koalb>>"
"1999zzz".succ     #=> "2000aaa"
"ZZZ9999".succ     #=> "AAAA0000"
"***".succ         #=> "**+"

4104
4105
4106
4107
4108
4109
4110
4111
# File 'string.c', line 4104

VALUE
rb_str_succ(VALUE orig)
{
    VALUE str;
    str = rb_str_new_with_class(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
    rb_enc_cr_str_copy_for_substr(str, orig);
    return str_succ(str);
}

#succ!String #next!String

Equivalent to String#succ, but modifies the receiver in place.


4209
4210
4211
4212
4213
4214
4215
# File 'string.c', line 4209

static VALUE
rb_str_succ_bang(VALUE str)
{
    rb_str_modify(str);
    str_succ(str);
    return str;
}

#octInteger

Treats leading characters of str as a string of octal digits (with an optional sign) and returns the corresponding number. Returns 0 if the conversion fails.

"123".oct       #=> 83
"-377".oct      #=> -255
"bad".oct       #=> 0
"0377bad".oct   #=> 255

If str starts with 0, radix indicators are honored. See Kernel#Integer.


9408
9409
9410
9411
9412
# File 'string.c', line 9408

static VALUE
rb_str_oct(VALUE str)
{
    return rb_str_to_inum(str, -8, FALSE);
}

#ordInteger

Returns the Integer ordinal of a one-character string.

"a".ord         #=> 97

9541
9542
9543
9544
9545
9546
9547
9548
# File 'string.c', line 9541

VALUE
rb_str_ord(VALUE s)
{
    unsigned int c;

    c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s));
    return UINT2NUM(c);
}

#partition(sep) ⇒ Array #partition(regexp) ⇒ Array

Searches sep or pattern (regexp) in the string and returns the part before it, the match, and the part after it. If it is not found, returns two empty strings and str.

"hello".partition("l")         #=> ["he", "l", "lo"]
"hello".partition("x")         #=> ["hello", "", ""]
"hello".partition(/.l/)        #=> ["h", "el", "lo"]

9780
9781
9782
9783
9784
9785
9786
9787
9788
9789
9790
9791
9792
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
# File 'string.c', line 9780

static VALUE
rb_str_partition(VALUE str, VALUE sep)
{
    long pos;

    sep = get_pat_quoted(sep, 0);
    if (RB_TYPE_P(sep, T_REGEXP)) {
  pos = rb_reg_search(sep, str, 0, 0);
  if (pos < 0) {
    failed:
      return rb_ary_new3(3, rb_str_dup(str), str_new_empty(str), str_new_empty(str));
  }
  sep = rb_str_subpat(str, sep, INT2FIX(0));
  if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
    }
    else {
  pos = rb_str_index(str, sep, 0);
  if (pos < 0) goto failed;
    }
    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
              sep,
              rb_str_subseq(str, pos+RSTRING_LEN(sep),
               RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
}

#prepend(other_str1, other_str2, ...) ⇒ String

Prepend—Prepend the given strings to str.

a = "!"
a.prepend("hello ", "world") #=> "hello world!"
a                            #=> "hello world!"

See also String#concat.


3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
# File 'string.c', line 3156

static VALUE
rb_str_prepend_multi(int argc, VALUE *argv, VALUE str)
{
    str_modifiable(str);

    if (argc == 1) {
  rb_str_update(str, 0L, 0L, argv[0]);
    }
    else if (argc > 1) {
  int i;
  VALUE arg_str = rb_str_tmp_new(0);
  rb_enc_copy(arg_str, str);
  for (i = 0; i < argc; i++) {
      rb_str_append(arg_str, argv[i]);
  }
  rb_str_update(str, 0L, 0L, arg_str);
    }

    return str;
}

#replace(other_str) ⇒ String

Replaces the contents of str with the corresponding values in other_str.

s = "hello"         #=> "hello"
s.replace "world"   #=> "world"

5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
# File 'string.c', line 5377

VALUE
rb_str_replace(VALUE str, VALUE str2)
{
    str_modifiable(str);
    if (str == str2) return str;

    StringValue(str2);
    str_discard(str);
    return str_replace(str, str2);
}

#reverseString

Returns a new string with the characters from str in reverse order.

"stressed".reverse   #=> "desserts"

5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
# File 'string.c', line 5632

static VALUE
rb_str_reverse(VALUE str)
{
    rb_encoding *enc;
    VALUE rev;
    char *s, *e, *p;
    int cr;

    if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
    enc = STR_ENC_GET(str);
    rev = rb_str_new_with_class(str, 0, RSTRING_LEN(str));
    s = RSTRING_PTR(str); e = RSTRING_END(str);
    p = RSTRING_END(rev);
    cr = ENC_CODERANGE(str);

    if (RSTRING_LEN(str) > 1) {
  if (single_byte_optimizable(str)) {
      while (s < e) {
    *--p = *s++;
      }
  }
  else if (cr == ENC_CODERANGE_VALID) {
      while (s < e) {
    int clen = rb_enc_fast_mbclen(s, e, enc);

    p -= clen;
    memcpy(p, s, clen);
    s += clen;
      }
  }
  else {
      cr = rb_enc_asciicompat(enc) ?
    ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
      while (s < e) {
    int clen = rb_enc_mbclen(s, e, enc);

    if (clen > 1 || (*s & 0x80)) cr = ENC_CODERANGE_UNKNOWN;
    p -= clen;
    memcpy(p, s, clen);
    s += clen;
      }
  }
    }
    STR_SET_LEN(rev, RSTRING_LEN(str));
    str_enc_copy(rev, str);
    ENC_CODERANGE_SET(rev, cr);

    return rev;
}

#reverse!String

Reverses str in place.


5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
# File 'string.c', line 5690

static VALUE
rb_str_reverse_bang(VALUE str)
{
    if (RSTRING_LEN(str) > 1) {
  if (single_byte_optimizable(str)) {
      char *s, *e, c;

      str_modify_keep_cr(str);
      s = RSTRING_PTR(str);
      e = RSTRING_END(str) - 1;
      while (s < e) {
    c = *s;
    *s++ = *e;
    *e-- = c;
      }
  }
  else {
      str_shared_replace(str, rb_str_reverse(str));
  }
    }
    else {
  str_modify_keep_cr(str);
    }
    return str;
}

#rindex(substring[, integer]) ⇒ Integer? #rindex(regexp[, integer]) ⇒ Integer?

Returns the index of the last occurrence of the given substring or pattern (regexp) in str. Returns nil if not found. If the second parameter is present, it specifies the position in the string to end the search—characters beyond this point will not be considered.

"hello".rindex('e')             #=> 1
"hello".rindex('l')             #=> 3
"hello".rindex('a')             #=> nil
"hello".rindex(?e)              #=> 1
"hello".rindex(/[aeiou]/, -2)   #=> 1

3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
# File 'string.c', line 3727

static VALUE
rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
{
    VALUE sub;
    VALUE vpos;
    rb_encoding *enc = STR_ENC_GET(str);
    long pos, len = str_strlen(str, enc); /* str's enc */

    if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
  pos = NUM2LONG(vpos);
  if (pos < 0) {
      pos += len;
      if (pos < 0) {
    if (RB_TYPE_P(sub, T_REGEXP)) {
        rb_backref_set(Qnil);
    }
    return Qnil;
      }
  }
  if (pos > len) pos = len;
    }
    else {
  pos = len;
    }

    if (SPECIAL_CONST_P(sub)) goto generic;
    switch (BUILTIN_TYPE(sub)) {
      case T_REGEXP:
  /* enc = rb_get_check(str, sub); */
  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
       enc, single_byte_optimizable(str));

  pos = rb_reg_search(sub, str, pos, 1);
  pos = rb_str_sublen(str, pos);
  if (pos >= 0) return LONG2NUM(pos);
  break;

      generic:
      default: {
  VALUE tmp;

  tmp = rb_check_string_type(sub);
  if (NIL_P(tmp)) {
      rb_raise(rb_eTypeError, "type mismatch: %s given",
         rb_obj_classname(sub));
  }
  sub = tmp;
      }
  /* fall through */
      case T_STRING:
  pos = rb_str_rindex(str, sub, pos);
  if (pos >= 0) return LONG2NUM(pos);
  break;
    }
    return Qnil;
}

#rjust(integer, padstr = ' ') ⇒ String

If integer is greater than the length of str, returns a new String of length integer with str right justified and padded with padstr; otherwise, returns str.

"hello".rjust(4)            #=> "hello"
"hello".rjust(20)           #=> "               hello"
"hello".rjust(20, '1234')   #=> "123412341234123hello"

9739
9740
9741
9742
9743
# File 'string.c', line 9739

static VALUE
rb_str_rjust(int argc, VALUE *argv, VALUE str)
{
    return rb_str_justify(argc, argv, str, 'r');
}

#rpartition(sep) ⇒ Array #rpartition(regexp) ⇒ Array

Searches sep or pattern (regexp) in the string from the end of the string, and returns the part before it, the match, and the part after it. If it is not found, returns two empty strings and str.

"hello".rpartition("l")         #=> ["hel", "l", "o"]
"hello".rpartition("x")         #=> ["", "", "hello"]
"hello".rpartition(/.l/)        #=> ["he", "ll", "o"]

9820
9821
9822
9823
9824
9825
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
9849
9850
9851
9852
9853
9854
9855
# File 'string.c', line 9820

static VALUE
rb_str_rpartition(VALUE str, VALUE sep)
{
    long pos = RSTRING_LEN(str);
    int regex = FALSE;

    if (RB_TYPE_P(sep, T_REGEXP)) {
  pos = rb_reg_search(sep, str, pos, 1);
  regex = TRUE;
    }
    else {
  VALUE tmp;

  tmp = rb_check_string_type(sep);
  if (NIL_P(tmp)) {
      rb_raise(rb_eTypeError, "type mismatch: %s given",
         rb_obj_classname(sep));
  }
  sep = tmp;
  pos = rb_str_sublen(str, pos);
  pos = rb_str_rindex(str, sep, pos);
    }
    if (pos < 0) {
       return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str));
    }
    if (regex) {
  sep = rb_reg_nth_match(0, rb_backref_get());
    }
    else {
  pos = rb_str_offset(str, pos);
    }
    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
              sep,
              rb_str_subseq(str, pos+RSTRING_LEN(sep),
          RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
}

#rstripString

Returns a copy of the receiver with trailing whitespace removed. See also String#lstrip and String#strip.

Refer to String#strip for the definition of whitespace.

"  hello  ".rstrip   #=> "  hello"
"hello".rstrip       #=> "hello"

9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
# File 'string.c', line 9166

static VALUE
rb_str_rstrip(VALUE str)
{
    rb_encoding *enc;
    char *start;
    long olen, roffset;

    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    roffset = rstrip_offset(str, start, start+olen, enc);

    if (roffset <= 0) return rb_str_dup(str);
    return rb_str_subseq(str, 0, olen-roffset);
}

#rstrip!self?

Removes trailing whitespace from the receiver. Returns the altered receiver, or nil if no change was made. See also String#lstrip! and String#strip!.

Refer to String#strip for the definition of whitespace.

"  hello  ".rstrip!  #=> "  hello"
"  hello".rstrip!    #=> nil
"hello".rstrip!      #=> nil

9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
# File 'string.c', line 9129

static VALUE
rb_str_rstrip_bang(VALUE str)
{
    rb_encoding *enc;
    char *start;
    long olen, roffset;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    roffset = rstrip_offset(str, start, start+olen, enc);
    if (roffset > 0) {
  long len = olen - roffset;

  STR_SET_LEN(str, len);
#if !SHARABLE_MIDDLE_SUBSTRING
  TERM_FILL(start+len, rb_enc_mbminlen(enc));
#endif
  return str;
    }
    return Qnil;
}

#scan(pattern) ⇒ Array #scan(pattern) {|match, ...| ... } ⇒ String

Both forms iterate through str, matching the pattern (which may be a Regexp or a String). For each match, a result is generated and either added to the result array or passed to the block. If the pattern contains no groups, each individual result consists of the matched string, $&. If the pattern contains groups, each individual result is itself an array containing one entry per group.

a = "cruel world"
a.scan(/\w+/)        #=> ["cruel", "world"]
a.scan(/.../)        #=> ["cru", "el ", "wor"]
a.scan(/(...)/)      #=> [["cru"], ["el "], ["wor"]]
a.scan(/(..)(..)/)   #=> [["cr", "ue"], ["l ", "wo"]]

And the block form:

a.scan(/\w+/) {|w| print "<<#{w}>> " }
print "\n"
a.scan(/(.)(.)/) {|x,y| print y, x }
print "\n"

produces:

"rceu lowlr\n">> "">>

Overloads:

  • #scan(pattern) {|match, ...| ... } ⇒ String

    Yields:


9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363
9364
9365
9366
9367
# File 'string.c', line 9336

static VALUE
rb_str_scan(VALUE str, VALUE pat)
{
    VALUE result;
    long start = 0;
    long last = -1, prev = 0;
    char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);

    pat = get_pat_quoted(pat, 1);
    mustnot_broken(str);
    if (!rb_block_given_p()) {
  VALUE ary = rb_ary_new();

  while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
      last = prev;
      prev = start;
      rb_ary_push(ary, result);
  }
  if (last >= 0) rb_pat_search(pat, str, last, 1);
  else rb_backref_set(Qnil);
  return ary;
    }

    while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
  last = prev;
  prev = start;
  rb_yield(result);
  str_mod_check(str, p, len);
    }
    if (last >= 0) rb_pat_search(pat, str, last, 1);
    return str;
}

#scrubString #scrub(repl) ⇒ String #scrub {|bytes| ... } ⇒ String

If the string is invalid byte sequence then replace invalid bytes with given replacement character, else returns self. If block is given, replace invalid bytes with returned value of the block.

"abc\u3042\x81".scrub #=> "abc\u3042\uFFFD"
"abc\u3042\x81".scrub("*") #=> "abc\u3042*"
"abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"

Overloads:


10537
10538
10539
10540
10541
10542
10543
# File 'string.c', line 10537

static VALUE
str_scrub(int argc, VALUE *argv, VALUE str)
{
    VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
    VALUE new = rb_str_scrub(str, repl);
    return NIL_P(new) ? rb_str_dup(str): new;
}

#scrub!String #scrub!(repl) ⇒ String #scrub! {|bytes| ... } ⇒ String

If the string is invalid byte sequence then replace invalid bytes with given replacement character, else returns self. If block is given, replace invalid bytes with returned value of the block.

"abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD"
"abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
"abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"

Overloads:


10559
10560
10561
10562
10563
10564
10565
10566
# File 'string.c', line 10559

static VALUE
str_scrub_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
    VALUE new = rb_str_scrub(str, repl);
    if (!NIL_P(new)) rb_str_replace(str, new);
    return str;
}

#setbyte(index, integer) ⇒ Integer

modifies the indexth byte as integer.


5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
# File 'string.c', line 5453

static VALUE
rb_str_setbyte(VALUE str, VALUE index, VALUE value)
{
    long pos = NUM2LONG(index);
    long len = RSTRING_LEN(str);
    char *head, *left = 0;
    unsigned char *ptr;
    rb_encoding *enc;
    int cr = ENC_CODERANGE_UNKNOWN, width, nlen;

    if (pos < -len || len <= pos)
        rb_raise(rb_eIndexError, "index %ld out of string", pos);
    if (pos < 0)
        pos += len;

    VALUE v = rb_to_int(value);
    VALUE w = rb_int_and(v, INT2FIX(0xff));
    unsigned char byte = NUM2INT(w) & 0xFF;

    if (!str_independent(str))
  str_make_independent(str);
    enc = STR_ENC_GET(str);
    head = RSTRING_PTR(str);
    ptr = (unsigned char *)&head[pos];
    if (!STR_EMBED_P(str)) {
  cr = ENC_CODERANGE(str);
  switch (cr) {
    case ENC_CODERANGE_7BIT:
            left = (char *)ptr;
      *ptr = byte;
      if (ISASCII(byte)) goto end;
      nlen = rb_enc_precise_mbclen(left, head+len, enc);
      if (!MBCLEN_CHARFOUND_P(nlen))
    ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
      else
    ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
      goto end;
    case ENC_CODERANGE_VALID:
      left = rb_enc_left_char_head(head, ptr, head+len, enc);
      width = rb_enc_precise_mbclen(left, head+len, enc);
      *ptr = byte;
      nlen = rb_enc_precise_mbclen(left, head+len, enc);
      if (!MBCLEN_CHARFOUND_P(nlen))
    ENC_CODERANGE_SET(str, ENC_CODERANGE_BROKEN);
      else if (MBCLEN_CHARFOUND_LEN(nlen) != width || ISASCII(byte))
    ENC_CODERANGE_CLEAR(str);
      goto end;
  }
    }
    ENC_CODERANGE_CLEAR(str);
    *ptr = byte;

  end:
    return value;
}

#lengthInteger #sizeInteger

Returns the character length of str.


1857
1858
1859
1860
1861
# File 'string.c', line 1857

VALUE
rb_str_length(VALUE str)
{
    return LONG2NUM(str_strlen(str, NULL));
}

#[](index) ⇒ String? #[](start, length) ⇒ String? #[](range) ⇒ String? #[](regexp) ⇒ String? #[](regexp, capture) ⇒ String? #[](match_str) ⇒ String? #slice(index) ⇒ String? #slice(start, length) ⇒ String? #slice(range) ⇒ String? #slice(regexp) ⇒ String? #slice(regexp, capture) ⇒ String? #slice(match_str) ⇒ String?

Element Reference — If passed a single index, returns a substring of one character at that index. If passed a start index and a length, returns a substring containing length characters starting at the start index. If passed a range, its beginning and end are interpreted as offsets delimiting the substring to be returned.

In these three cases, if an index is negative, it is counted from the end of the string. For the start and range cases the starting index is just before a character and an index matching the string's size. Additionally, an empty string is returned when the starting index for a character range is at the end of the string.

Returns nil if the initial index falls outside the string or the length is negative.

If a Regexp is supplied, the matching portion of the string is returned. If a capture follows the regular expression, which may be a capture group index or name, follows the regular expression that component of the MatchData is returned instead.

If a match_str is given, that string is returned if it occurs in the string.

Returns nil if the regular expression does not match or the match string cannot be found.

a = "hello there"

a[1]                   #=> "e"
a[2, 3]                #=> "llo"
a[2..3]                #=> "ll"

a[-3, 2]               #=> "er"
a[7..-2]               #=> "her"
a[-4..-2]              #=> "her"
a[-2..-4]              #=> ""

a[11, 0]               #=> ""
a[11]                  #=> nil
a[12, 0]               #=> nil
a[12..-1]              #=> nil

a[/[aeiou](.)\1/]      #=> "ell"
a[/[aeiou](.)\1/, 0]   #=> "ell"
a[/[aeiou](.)\1/, 1]   #=> "l"
a[/[aeiou](.)\1/, 2]   #=> nil

a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"]     #=> "e"

a["lo"]                #=> "lo"
a["bye"]               #=> nil

4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
# File 'string.c', line 4570

static VALUE
rb_str_aref_m(int argc, VALUE *argv, VALUE str)
{
    if (argc == 2) {
  if (RB_TYPE_P(argv[0], T_REGEXP)) {
      return rb_str_subpat(str, argv[0], argv[1]);
  }
  else {
      long beg = NUM2LONG(argv[0]);
      long len = NUM2LONG(argv[1]);
      return rb_str_substr(str, beg, len);
  }
    }
    rb_check_arity(argc, 1, 2);
    return rb_str_aref(str, argv[0]);
}

#slice!(integer) ⇒ String? #slice!(integer, integer) ⇒ String? #slice!(range) ⇒ String? #slice!(regexp) ⇒ String? #slice!(other_str) ⇒ String?

Deletes the specified portion from str, and returns the portion deleted.

string = "this is a string"
string.slice!(2)        #=> "i"
string.slice!(3..6)     #=> " is "
string.slice!(/s.*t/)   #=> "sa st"
string.slice!("r")      #=> "r"
string                  #=> "thing"

4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
# File 'string.c', line 4876

static VALUE
rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE result;
    VALUE buf[3];
    int i;

    rb_check_arity(argc, 1, 2);
    for (i=0; i<argc; i++) {
  buf[i] = argv[i];
    }
    str_modify_keep_cr(str);
    result = rb_str_aref_m(argc, buf, str);
    if (!NIL_P(result)) {
  buf[i] = rb_str_new(0,0);
  rb_str_aset_m(argc+1, buf, str);
    }
    return result;
}

#split(pattern = nil, [limit]) ⇒ Array #split(pattern = nil, [limit]) {|sub| ... } ⇒ String

Divides str into substrings based on a delimiter, returning an array of these substrings.

If pattern is a String, then its contents are used as the delimiter when splitting str. If pattern is a single space, str is split on whitespace, with leading and trailing whitespace and runs of contiguous whitespace characters ignored.

If pattern is a Regexp, str is divided where the pattern matches. Whenever the pattern matches a zero-length string, str is split into individual characters. If pattern contains groups, the respective matches will be returned in the array as well.

If pattern is nil, the value of $; is used. If $; is nil (which is the default), str is split on whitespace as if ' ' were specified.

If the limit parameter is omitted, trailing null fields are suppressed. If limit is a positive number, at most that number of split substrings will be returned (captured groups will be returned as well, but are not counted towards the limit). If limit is 1, the entire string is returned as the only entry in an array. If negative, there is no limit to the number of fields returned, and trailing null fields are not suppressed.

When the input str is empty an empty Array is returned as the string is considered to have no fields to split.

" now's  the time ".split       #=> ["now's", "the", "time"]
" now's  the time ".split(' ')  #=> ["now's", "the", "time"]
" now's  the time".split(/ /)   #=> ["", "now's", "", "the", "time"]
"1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
"hello".split(//)               #=> ["h", "e", "l", "l", "o"]
"hello".split(//, 3)            #=> ["h", "e", "llo"]
"hi mom".split(%r{\s*})         #=> ["h", "i", "m", "o", "m"]

"mellow yellow".split("ello")   #=> ["m", "w y", "w"]
"1,2,,3,4,,".split(',')         #=> ["1", "2", "", "3", "4"]
"1,2,,3,4,,".split(',', 4)      #=> ["1", "2", "", "3,4,,"]
"1,2,,3,4,,".split(',', -4)     #=> ["1", "2", "", "3", "4", "", ""]

"1:2:3".split(/(:)()()/, 2)     #=> ["1", ":", "", "", "2:3"]

"".split(',', -1)               #=> []

If a block is given, invoke the block with each split substring.

Overloads:

  • #split(pattern = nil, [limit]) {|sub| ... } ⇒ String

    Yields:


7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
# File 'string.c', line 7909

static VALUE
rb_str_split_m(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    VALUE spat;
    VALUE limit;
    enum {awk, string, regexp, chars} split_type;
    long beg, end, i = 0, empty_count = -1;
    int lim = 0;
    VALUE result, tmp;

    result = rb_block_given_p() ? Qfalse : Qnil;
    if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
  lim = NUM2INT(limit);
  if (lim <= 0) limit = Qnil;
  else if (lim == 1) {
      if (RSTRING_LEN(str) == 0)
    return result ? rb_ary_new2(0) : str;
      tmp = rb_str_dup(str);
      if (!result) {
    rb_yield(tmp);
    return str;
      }
      return rb_ary_new3(1, tmp);
  }
  i = 1;
    }
    if (NIL_P(limit) && !lim) empty_count = 0;

    enc = STR_ENC_GET(str);
    split_type = regexp;
    if (!NIL_P(spat)) {
  spat = get_pat_quoted(spat, 0);
    }
    else if (NIL_P(spat = rb_fs)) {
  split_type = awk;
    }
    else if (!(spat = rb_fs_check(spat))) {
  rb_raise(rb_eTypeError, "value of $; must be String or Regexp");
    }
    else {
        rb_warn("$; is set to non-nil value");
    }
    if (split_type != awk) {
  if (BUILTIN_TYPE(spat) == T_STRING) {
      rb_encoding *enc2 = STR_ENC_GET(spat);

      mustnot_broken(spat);
      split_type = string;
      if (RSTRING_LEN(spat) == 0) {
    /* Special case - split into chars */
                split_type = chars;
      }
      else if (rb_enc_asciicompat(enc2) == 1) {
    if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
        split_type = awk;
    }
      }
      else {
    int l;
    if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
        RSTRING_LEN(spat) == l) {
        split_type = awk;
    }
      }
  }
    }

#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))

    if (result) result = rb_ary_new();
    beg = 0;
    char *ptr = RSTRING_PTR(str);
    char *eptr = RSTRING_END(str);
    if (split_type == awk) {
  char *bptr = ptr;
  int skip = 1;
  unsigned int c;

  end = beg;
  if (is_ascii_string(str)) {
      while (ptr < eptr) {
    c = (unsigned char)*ptr++;
    if (skip) {
        if (ascii_isspace(c)) {
      beg = ptr - bptr;
        }
        else {
      end = ptr - bptr;
      skip = 0;
      if (!NIL_P(limit) && lim <= i) break;
        }
    }
    else if (ascii_isspace(c)) {
        SPLIT_STR(beg, end-beg);
        skip = 1;
        beg = ptr - bptr;
        if (!NIL_P(limit)) ++i;
    }
    else {
        end = ptr - bptr;
    }
      }
  }
  else {
      while (ptr < eptr) {
    int n;

    c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
    ptr += n;
    if (skip) {
        if (rb_isspace(c)) {
      beg = ptr - bptr;
        }
        else {
      end = ptr - bptr;
      skip = 0;
      if (!NIL_P(limit) && lim <= i) break;
        }
    }
    else if (rb_isspace(c)) {
        SPLIT_STR(beg, end-beg);
        skip = 1;
        beg = ptr - bptr;
        if (!NIL_P(limit)) ++i;
    }
    else {
        end = ptr - bptr;
    }
      }
  }
    }
    else if (split_type == string) {
  char *str_start = ptr;
  char *substr_start = ptr;
  char *sptr = RSTRING_PTR(spat);
  long slen = RSTRING_LEN(spat);

  mustnot_broken(str);
  enc = rb_enc_check(str, spat);
  while (ptr < eptr &&
         (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
      /* Check we are at the start of a char */
      char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
      if (t != ptr + end) {
    ptr = t;
    continue;
      }
      SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
      ptr += end + slen;
      substr_start = ptr;
      if (!NIL_P(limit) && lim <= ++i) break;
  }
  beg = ptr - str_start;
    }
    else if (split_type == chars) {
        char *str_start = ptr;
        int n;

        mustnot_broken(str);
        enc = rb_enc_get(str);
        while (ptr < eptr &&
               (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
            SPLIT_STR(ptr - str_start, n);
            ptr += n;
            if (!NIL_P(limit) && lim <= ++i) break;
        }
        beg = ptr - str_start;
    }
    else {
  long len = RSTRING_LEN(str);
  long start = beg;
  long idx;
  int last_null = 0;
  struct re_registers *regs;
        VALUE match = 0;

        for (; (end = rb_reg_search(spat, str, start, 0)) >= 0;
             (match ? (rb_match_unbusy(match), rb_backref_set(match)) : (void)0)) {
            match = rb_backref_get();
            if (!result) rb_match_busy(match);
            regs = RMATCH_REGS(match);
      if (start == end && BEG(0) == END(0)) {
    if (!ptr) {
        SPLIT_STR(0, 0);
        break;
    }
    else if (last_null == 1) {
                    SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
        beg = start;
    }
    else {
                    if (start == len)
                        start++;
                    else
                        start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
        last_null = 1;
        continue;
    }
      }
      else {
    SPLIT_STR(beg, end-beg);
    beg = start = END(0);
      }
      last_null = 0;

      for (idx=1; idx < regs->num_regs; idx++) {
    if (BEG(idx) == -1) continue;
    SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
      }
      if (!NIL_P(limit) && lim <= ++i) break;
  }
        if (match) rb_match_unbusy(match);
    }
    if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
  SPLIT_STR(beg, RSTRING_LEN(str)-beg);
    }

    return result ? result : str;
}

#squeeze([other_str]) ⇒ String

Builds a set of characters from the other_str parameter(s) using the procedure described for String#count. Returns a new string where runs of the same character that occur in this set are replaced by a single character. If no arguments are given, all runs of identical characters are replaced by a single character.

"yellow moon".squeeze                  #=> "yelow mon"
"  now   is  the".squeeze(" ")         #=> " now is the"
"putters shoot balls".squeeze("m-z")   #=> "puters shot balls"

7651
7652
7653
7654
7655
7656
7657
# File 'string.c', line 7651

static VALUE
rb_str_squeeze(int argc, VALUE *argv, VALUE str)
{
    str = rb_str_dup(str);
    rb_str_squeeze_bang(argc, argv, str);
    return str;
}

#squeeze!([other_str]) ⇒ String?

Squeezes str in place, returning either str, or nil if no changes were made.


7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
# File 'string.c', line 7560

static VALUE
rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
{
    char squeez[TR_TABLE_SIZE];
    rb_encoding *enc = 0;
    VALUE del = 0, nodel = 0;
    unsigned char *s, *send, *t;
    int i, modify = 0;
    int ascompat, singlebyte = single_byte_optimizable(str);
    unsigned int save;

    if (argc == 0) {
  enc = STR_ENC_GET(str);
    }
    else {
  for (i=0; i<argc; i++) {
      VALUE s = argv[i];

      StringValue(s);
      enc = rb_enc_check(str, s);
      if (singlebyte && !single_byte_optimizable(s))
    singlebyte = 0;
      tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
  }
    }

    str_modify_keep_cr(str);
    s = t = (unsigned char *)RSTRING_PTR(str);
    if (!s || RSTRING_LEN(str) == 0) return Qnil;
    send = (unsigned char *)RSTRING_END(str);
    save = -1;
    ascompat = rb_enc_asciicompat(enc);

    if (singlebyte) {
        while (s < send) {
            unsigned int c = *s++;
      if (c != save || (argc > 0 && !squeez[c])) {
          *t++ = save = c;
      }
  }
    }
    else {
  while (s < send) {
      unsigned int c;
      int clen;

            if (ascompat && (c = *s) < 0x80) {
    if (c != save || (argc > 0 && !squeez[c])) {
        *t++ = save = c;
    }
    s++;
      }
      else {
                c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, enc);

    if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
        if (t != s) rb_enc_mbcput(c, t, enc);
        save = c;
        t += clen;
    }
    s += clen;
      }
  }
    }

    TERM_FILL((char *)t, TERM_LEN(str));
    if ((char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
        STR_SET_LEN(str, (char *)t - RSTRING_PTR(str));
  modify = 1;
    }

    if (modify) return str;
    return Qnil;
}

#start_with?([prefixes]) ⇒ Boolean

Returns true if str starts with one of the prefixes given. Each of the prefixes should be a String or a Regexp.

"hello".start_with?("hell")               #=> true
"hello".start_with?(/H/i)                 #=> true

# returns true if one of the prefixes matches.
"hello".start_with?("heaven", "hell")     #=> true
"hello".start_with?("heaven", "paradise") #=> false

9872
9873
9874
9875
9876
9877
9878
9879
9880
9881
9882
9883
9884
9885
9886
9887
9888
9889
9890
9891
9892
# File 'string.c', line 9872

static VALUE
rb_str_start_with(int argc, VALUE *argv, VALUE str)
{
    int i;

    for (i=0; i<argc; i++) {
  VALUE tmp = argv[i];
  if (RB_TYPE_P(tmp, T_REGEXP)) {
      if (rb_reg_start_with_p(tmp, str))
    return Qtrue;
  }
  else {
      StringValue(tmp);
      rb_enc_check(str, tmp);
      if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
      if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
    return Qtrue;
  }
    }
    return Qfalse;
}

#stripString

Returns a copy of the receiver with leading and trailing whitespace removed.

Whitespace is defined as any of the following characters: null, horizontal tab, line feed, vertical tab, form feed, carriage return, space.

"    hello    ".strip   #=> "hello"
"\tgoodbye\r\n".strip   #=> "goodbye"
"\x00\t\n\v\f\r ".strip #=> ""
"hello".strip           #=> "hello"

9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
# File 'string.c', line 9239

static VALUE
rb_str_strip(VALUE str)
{
    char *start;
    long olen, loffset, roffset;
    rb_encoding *enc = STR_ENC_GET(str);

    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    roffset = rstrip_offset(str, start+loffset, start+olen, enc);

    if (loffset <= 0 && roffset <= 0) return rb_str_dup(str);
    return rb_str_subseq(str, loffset, olen-loffset-roffset);
}

#strip!self?

Removes leading and trailing whitespace from the receiver. Returns the altered receiver, or nil if there was no change.

Refer to String#strip for the definition of whitespace.

"  hello  ".strip!  #=> "hello"
"hello".strip!      #=> nil

9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221
# File 'string.c', line 9195

static VALUE
rb_str_strip_bang(VALUE str)
{
    char *start;
    long olen, loffset, roffset;
    rb_encoding *enc;

    str_modify_keep_cr(str);
    enc = STR_ENC_GET(str);
    RSTRING_GETMEM(str, start, olen);
    loffset = lstrip_offset(str, start, start+olen, enc);
    roffset = rstrip_offset(str, start+loffset, start+olen, enc);

    if (loffset > 0 || roffset > 0) {
  long len = olen-roffset;
  if (loffset > 0) {
      len -= loffset;
      memmove(start, start + loffset, len);
  }
  STR_SET_LEN(str, len);
#if !SHARABLE_MIDDLE_SUBSTRING
  TERM_FILL(start+len, rb_enc_mbminlen(enc));
#endif
  return str;
    }
    return Qnil;
}

#sub(pattern, replacement) ⇒ String #sub(pattern, hash) ⇒ String #sub(pattern) {|match| ... } ⇒ String

Returns a copy of str with the first occurrence of pattern replaced by the second argument. The pattern is typically a Regexp; if given as a String, any regular expression metacharacters it contains will be interpreted literally, e.g. \d will match a backslash followed by 'd', instead of a digit.

If replacement is a String it will be substituted for the matched text. It may contain back-references to the pattern's capture groups of the form \d, where d is a group number, or \k<n>, where n is a group name. Similarly, \&, \', \`, and + correspond to special variables, $&, $', $`, and $+, respectively. (See regexp.rdoc for details.) \0 is the same as \&. \\ is interpreted as an escape, i.e., a single backslash. Note that, within replacement the special match variables, such as $&, will not refer to the current match.

If the second argument is a Hash, and the matched text is one of its keys, the corresponding value is the replacement string.

In the block form, the current match string is passed in as a parameter, and variables such as $1, $2, $`, $&, and $' will be set appropriately. (See regexp.rdoc for details.) The value returned by the block will be substituted for the match on each call.

"hello".sub(/[aeiou]/, '*')                  #=> "h*llo"
"hello".sub(/([aeiou])/, '<\1>')             #=> "h<e>llo"
"hello".sub(/./) {|s| s.ord.to_s + ' ' }     #=> "104 ello"
"hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*')  #=> "h*e*llo"
'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
 #=> "Is /bin/bash your preferred shell?"

Note that a string literal consumes backslashes. (See syntax/literals.rdoc for details about string literals.) Back-references are typically preceded by an additional backslash. For example, if you want to write a back-reference \& in replacement with a double-quoted string literal, you need to write: "..\\&..". If you want to write a non-back-reference string \& in replacement, you need first to escape the backslash to prevent this method from interpreting it as a back-reference, and then you need to escape the backslashes again to prevent a string literal from consuming them: "..\\\\&..". You may want to use the block form to avoid a lot of backslashes.

Overloads:

  • #sub(pattern) {|match| ... } ⇒ String

    Yields:


5146
5147
5148
5149
5150
5151
5152
# File 'string.c', line 5146

static VALUE
rb_str_sub(int argc, VALUE *argv, VALUE str)
{
    str = rb_str_dup(str);
    rb_str_sub_bang(argc, argv, str);
    return str;
}

#sub!(pattern, replacement) ⇒ String? #sub!(pattern) {|match| ... } ⇒ String?

Performs the same substitution as String#sub in-place.

Returns str if a substitution was performed or nil if no substitution was performed.

Overloads:

  • #sub!(pattern) {|match| ... } ⇒ String?

    Yields:


4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
# File 'string.c', line 4981

static VALUE
rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
{
    VALUE pat, repl, hash = Qnil;
    int iter = 0;
    long plen;
    int min_arity = rb_block_given_p() ? 1 : 2;
    long beg;

    rb_check_arity(argc, min_arity, 2);
    if (argc == 1) {
  iter = 1;
    }
    else {
  repl = argv[1];
  hash = rb_check_hash_type(argv[1]);
  if (NIL_P(hash)) {
      StringValue(repl);
  }
    }

    pat = get_pat_quoted(argv[0], 1);

    str_modifiable(str);
    beg = rb_pat_search(pat, str, 0, 1);
    if (beg >= 0) {
  rb_encoding *enc;
  int cr = ENC_CODERANGE(str);
  long beg0, end0;
  VALUE match, match0 = Qnil;
  struct re_registers *regs;
  char *p, *rp;
  long len, rlen;

  match = rb_backref_get();
  regs = RMATCH_REGS(match);
  if (RB_TYPE_P(pat, T_STRING)) {
      beg0 = beg;
      end0 = beg0 + RSTRING_LEN(pat);
      match0 = pat;
  }
  else {
      beg0 = BEG(0);
      end0 = END(0);
      if (iter) match0 = rb_reg_nth_match(0, match);
  }

  if (iter || !NIL_P(hash)) {
      p = RSTRING_PTR(str); len = RSTRING_LEN(str);

            if (iter) {
                repl = rb_obj_as_string(rb_yield(match0));
            }
            else {
                repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
                repl = rb_obj_as_string(repl);
            }
      str_mod_check(str, p, len);
      rb_check_frozen(str);
  }
  else {
      repl = rb_reg_regsub(repl, str, regs, RB_TYPE_P(pat, T_STRING) ? Qnil : pat);
  }

        enc = rb_enc_compatible(str, repl);
        if (!enc) {
            rb_encoding *str_enc = STR_ENC_GET(str);
      p = RSTRING_PTR(str); len = RSTRING_LEN(str);
      if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
    coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
                rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
       rb_enc_name(str_enc),
       rb_enc_name(STR_ENC_GET(repl)));
            }
            enc = STR_ENC_GET(repl);
        }
  rb_str_modify(str);
  rb_enc_associate(str, enc);
  if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
      int cr2 = ENC_CODERANGE(repl);
            if (cr2 == ENC_CODERANGE_BROKEN ||
                (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
                cr = ENC_CODERANGE_UNKNOWN;
            else
                cr = cr2;
  }
  plen = end0 - beg0;
        rlen = RSTRING_LEN(repl);
  len = RSTRING_LEN(str);
  if (rlen > plen) {
      RESIZE_CAPA(str, len + rlen - plen);
  }
  p = RSTRING_PTR(str);
  if (rlen != plen) {
      memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
  }
        rp = RSTRING_PTR(repl);
        memmove(p + beg0, rp, rlen);
  len += rlen - plen;
  STR_SET_LEN(str, len);
  TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
  ENC_CODERANGE_SET(str, cr);

  return str;
    }
    return Qnil;
}

#succString #nextString

Returns the successor to str. The successor is calculated by incrementing characters starting from the rightmost alphanumeric (or the rightmost character if there are no alphanumerics) in the string. Incrementing a digit always results in another digit, and incrementing a letter results in another letter of the same case. Incrementing nonalphanumerics uses the underlying character set's collating sequence.

If the increment generates a “carry,'' the character to the left of it is incremented. This process repeats until there is no carry, adding an additional character if necessary.

"abcd".succ        #=> "abce"
"THX1138".succ     #=> "THX1139"
"<<koala>>".succ   #=> "<<koalb>>"
"1999zzz".succ     #=> "2000aaa"
"ZZZ9999".succ     #=> "AAAA0000"
"***".succ         #=> "**+"

4104
4105
4106
4107
4108
4109
4110
4111
# File 'string.c', line 4104

VALUE
rb_str_succ(VALUE orig)
{
    VALUE str;
    str = rb_str_new_with_class(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
    rb_enc_cr_str_copy_for_substr(str, orig);
    return str_succ(str);
}

#succ!String #next!String

Equivalent to String#succ, but modifies the receiver in place.


4209
4210
4211
4212
4213
4214
4215
# File 'string.c', line 4209

static VALUE
rb_str_succ_bang(VALUE str)
{
    rb_str_modify(str);
    str_succ(str);
    return str;
}

#sum(n = 16) ⇒ Integer

Returns a basic n-bit checksum of the characters in str, where n is the optional Integer parameter, defaulting to 16. The result is simply the sum of the binary value of each byte in str modulo 2**n - 1. This is not a particularly good checksum.


9560
9561
9562
9563
9564
9565
9566
9567
9568
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579
9580
9581
9582
9583
9584
9585
9586
9587
9588
9589
9590
9591
9592
9593
9594
9595
9596
9597
9598
9599
9600
9601
9602
9603
9604
9605
9606
9607
9608
9609
9610
9611
# File 'string.c', line 9560

static VALUE
rb_str_sum(int argc, VALUE *argv, VALUE str)
{
    int bits = 16;
    char *ptr, *p, *pend;
    long len;
    VALUE sum = INT2FIX(0);
    unsigned long sum0 = 0;

    if (rb_check_arity(argc, 0, 1) && (bits = NUM2INT(argv[0])) < 0) {
        bits = 0;
    }
    ptr = p = RSTRING_PTR(str);
    len = RSTRING_LEN(str);
    pend = p + len;

    while (p < pend) {
        if (FIXNUM_MAX - UCHAR_MAX < sum0) {
            sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
            str_mod_check(str, ptr, len);
            sum0 = 0;
        }
        sum0 += (unsigned char)*p;
        p++;
    }

    if (bits == 0) {
        if (sum0) {
            sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
        }
    }
    else {
        if (sum == INT2FIX(0)) {
            if (bits < (int)sizeof(long)*CHAR_BIT) {
                sum0 &= (((unsigned long)1)<<bits)-1;
            }
            sum = LONG2FIX(sum0);
        }
        else {
            VALUE mod;

            if (sum0) {
                sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
            }

            mod = rb_funcall(INT2FIX(1), idLTLT, 1, INT2FIX(bits));
            mod = rb_funcall(mod, '-', 1, INT2FIX(1));
            sum = rb_funcall(sum, '&', 1, mod);
        }
    }
    return sum;
}

#swapcaseString #swapcase([options]) ⇒ String

Returns a copy of str with uppercase alphabetic characters converted to lowercase and lowercase characters converted to uppercase.

See String#downcase for meaning of options and use with different encodings.

"Hello".swapcase          #=> "hELLO"
"cYbEr_PuNk11".swapcase   #=> "CyBeR_pUnK11"

6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
# File 'string.c', line 6982

static VALUE
rb_str_swapcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return str;
    if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new_with_class(str, 0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }
    return ret;
}

#swapcase!String? #swapcase!([options]) ⇒ String?

Equivalent to String#swapcase, but modifies the receiver in place, returning str, or nil if no changes were made.

See String#downcase for meaning of options and use with different encodings.


6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
# File 'string.c', line 6949

static VALUE
rb_str_swapcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
  str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#to_cObject

Returns a complex which denotes the string form. The parser ignores leading whitespaces and trailing garbage. Any digit sequences can be separated by an underscore. Returns zero for null or garbage string.

'9'.to_c           #=> (9+0i)
'2.5'.to_c         #=> (2.5+0i)
'2.5/1'.to_c       #=> ((5/2)+0i)
'-3/2'.to_c        #=> ((-3/2)+0i)
'-i'.to_c          #=> (0-1i)
'45i'.to_c         #=> (0+45i)
'3-4i'.to_c        #=> (3-4i)
'-4e2-4e-2i'.to_c  #=> (-400.0-0.04i)
'-0.0-0.0i'.to_c   #=> (-0.0-0.0i)
'1/2+3/4i'.to_c    #=> ((1/2)+(3/4)*i)
'ruby'.to_c        #=> (0+0i)

See Kernel.Complex.


2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
# File 'complex.c', line 2033

static VALUE
string_to_c(VALUE self)
{
    char *s;
    VALUE num;

    rb_must_asciicompat(self);

    s = RSTRING_PTR(self);

    if (s && s[RSTRING_LEN(self)]) {
  rb_str_modify(self);
  s = RSTRING_PTR(self);
  s[RSTRING_LEN(self)] = '\0';
    }

    if (!s)
  s = (char *)"";

    (void)parse_comp(s, 0, &num);

    return num;
}

#to_fFloat

Returns the result of interpreting leading characters in str as a floating point number. Extraneous characters past the end of a valid number are ignored. If there is not a valid number at the start of str, 0.0 is returned. This method never raises an exception.

"123.45e1".to_f        #=> 1234.5
"45.67 degrees".to_f   #=> 45.67
"thx1138".to_f         #=> 0.0

5789
5790
5791
5792
5793
# File 'string.c', line 5789

static VALUE
rb_str_to_f(VALUE str)
{
    return DBL2NUM(rb_str_to_dbl(str, FALSE));
}

#to_i(base = 10) ⇒ Integer

Returns the result of interpreting leading characters in str as an integer base base (between 2 and 36). Extraneous characters past the end of a valid number are ignored. If there is not a valid number at the start of str, 0 is returned. This method never raises an exception when base is valid.

"12345".to_i             #=> 12345
"99 red balloons".to_i   #=> 99
"0a".to_i                #=> 0
"0a".to_i(16)            #=> 10
"hello".to_i             #=> 0
"1100101".to_i(2)        #=> 101
"1100101".to_i(8)        #=> 294977
"1100101".to_i(10)       #=> 1100101
"1100101".to_i(16)       #=> 17826049

5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
# File 'string.c', line 5763

static VALUE
rb_str_to_i(int argc, VALUE *argv, VALUE str)
{
    int base = 10;

    if (rb_check_arity(argc, 0, 1) && (base = NUM2INT(argv[0])) < 0) {
  rb_raise(rb_eArgError, "invalid radix %d", base);
    }
    return rb_str_to_inum(str, base, FALSE);
}

#to_rObject

Returns the result of interpreting leading characters in str as a rational. Leading whitespace and extraneous characters past the end of a valid number are ignored. Digit sequences can be separated by an underscore. If there is not a valid number at the start of str, zero is returned. This method never raises an exception.

'  2  '.to_r       #=> (2/1)
'300/2'.to_r       #=> (150/1)
'-9.2'.to_r        #=> (-46/5)
'-9.2e2'.to_r      #=> (-920/1)
'1_234_567'.to_r   #=> (1234567/1)
'21 June 09'.to_r  #=> (21/1)
'21/06/09'.to_r    #=> (7/2)
'BWV 1079'.to_r    #=> (0/1)

NOTE: “0.3”.to_r isn't the same as 0.3.to_r. The former is equivalent to “3/10”.to_r, but the latter isn't so.

"0.3".to_r == 3/10r  #=> true
0.3.to_r   == 3/10r  #=> false

See also Kernel#Rational.


2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
# File 'rational.c', line 2533

static VALUE
string_to_r(VALUE self)
{
    VALUE num;

    rb_must_asciicompat(self);

    num = parse_rat(RSTRING_PTR(self), RSTRING_END(self), 0, TRUE);

    if (RB_FLOAT_TYPE_P(num) && !FLOAT_ZERO_P(num))
  rb_raise(rb_eFloatDomainError, "Infinity");
    return num;
}

#to_sString #to_strString

Returns self.

If called on a subclass of String, converts the receiver to a String object.


5806
5807
5808
5809
5810
5811
5812
5813
# File 'string.c', line 5806

static VALUE
rb_str_to_s(VALUE str)
{
    if (rb_obj_class(str) != rb_cString) {
  return str_duplicate(rb_cString, str);
    }
    return str;
}

#to_sString #to_strString

Returns self.

If called on a subclass of String, converts the receiver to a String object.


5806
5807
5808
5809
5810
5811
5812
5813
# File 'string.c', line 5806

static VALUE
rb_str_to_s(VALUE str)
{
    if (rb_obj_class(str) != rb_cString) {
  return str_duplicate(rb_cString, str);
    }
    return str;
}

#internObject #to_symObject

Returns the Symbol corresponding to str, creating the symbol if it did not previously exist. See Symbol#id2name.

"Koala".intern         #=> :Koala
s = 'cat'.to_sym       #=> :cat
s == :cat              #=> true
s = '@cat'.to_sym      #=> :@cat
s == :@cat             #=> true

This can also be used to create symbols that cannot be represented using the :xxx notation.

'cat and dog'.to_sym   #=> :"cat and dog"

718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
# File 'symbol.c', line 718

VALUE
rb_str_intern(VALUE str)
{
#if USE_SYMBOL_GC
    rb_encoding *enc, *ascii;
    int type;
#else
    ID id;
#endif
    VALUE sym = lookup_str_sym(str);

    if (sym) {
  return sym;
    }

#if USE_SYMBOL_GC
    enc = rb_enc_get(str);
    ascii = rb_usascii_encoding();
    if (enc != ascii && sym_check_asciionly(str)) {
  str = rb_str_dup(str);
  rb_enc_associate(str, ascii);
  OBJ_FREEZE(str);
  enc = ascii;
    }
    else {
        str = rb_str_dup(str);
        OBJ_FREEZE(str);
    }
    str = rb_fstring(str);
    type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
    if (type < 0) type = ID_JUNK;
    return dsymbol_alloc(rb_cSymbol, str, enc, type);
#else
    id = intern_str(str, 0);
    return ID2SYM(id);
#endif
}

#tr(from_str, to_str) ⇒ String

Returns a copy of str with the characters in from_str replaced by the corresponding characters in to_str. If to_str is shorter than from_str, it is padded with its last character in order to maintain the correspondence.

"hello".tr('el', 'ip')      #=> "hippo"
"hello".tr('aeiou', '*')    #=> "h*ll*"
"hello".tr('aeiou', 'AA*')  #=> "hAll*"

Both strings may use the c1-c2 notation to denote ranges of characters, and from_str may start with a ^, which denotes all characters except those listed.

"hello".tr('a-y', 'b-z')    #=> "ifmmp"
"hello".tr('^aeiou', '*')   #=> "*e**o"

The backslash character \ can be used to escape ^ or - and is otherwise ignored unless it appears at the end of a range or the end of the from_str or to_str:

"hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
"hello-world".tr("a\\-eo", "*")   #=> "h*ll**w*rld"

"hello\r\nworld".tr("\r", "")   #=> "hello\nworld"
"hello\r\nworld".tr("\\r", "")  #=> "hello\r\nwold"
"hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"

"X['\\b']".tr("X\\", "")   #=> "['b']"
"X['\\b']".tr("X-\\]", "") #=> "'b'"

7364
7365
7366
7367
7368
7369
7370
# File 'string.c', line 7364

static VALUE
rb_str_tr(VALUE str, VALUE src, VALUE repl)
{
    str = rb_str_dup(str);
    tr_trans(str, src, repl, 0);
    return str;
}

#tr!(from_str, to_str) ⇒ String?

Translates str in place, using the same rules as String#tr. Returns str, or nil if no changes were made.


7322
7323
7324
7325
7326
# File 'string.c', line 7322

static VALUE
rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
{
    return tr_trans(str, src, repl, 0);
}

#tr_s(from_str, to_str) ⇒ String

Processes a copy of str as described under String#tr, then removes duplicate characters in regions that were affected by the translation.

"hello".tr_s('l', 'r')     #=> "hero"
"hello".tr_s('el', '*')    #=> "h*o"
"hello".tr_s('el', 'hx')   #=> "hhxo"

7688
7689
7690
7691
7692
7693
7694
# File 'string.c', line 7688

static VALUE
rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
{
    str = rb_str_dup(str);
    tr_trans(str, src, repl, 1);
    return str;
}

#tr_s!(from_str, to_str) ⇒ String?

Performs String#tr_s processing on str in place, returning str, or nil if no changes were made.


7668
7669
7670
7671
7672
# File 'string.c', line 7668

static VALUE
rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
{
    return tr_trans(str, src, repl, 1);
}

#undumpString

Returns an unescaped version of the string. This does the inverse of String#dump.

"\"hello \\n ''\"".undump #=> "hello \n ''"

6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
# File 'string.c', line 6347

static VALUE
str_undump(VALUE str)
{
    const char *s = RSTRING_PTR(str);
    const char *s_end = RSTRING_END(str);
    rb_encoding *enc = rb_enc_get(str);
    VALUE undumped = rb_enc_str_new(s, 0L, enc);
    bool utf8 = false;
    bool binary = false;
    int w;

    rb_must_asciicompat(str);
    if (rb_str_is_ascii_only_p(str) == Qfalse) {
  rb_raise(rb_eRuntimeError, "non-ASCII character detected");
    }
    if (!str_null_check(str, &w)) {
       rb_raise(rb_eRuntimeError, "string contains null byte");
    }
    if (RSTRING_LEN(str) < 2) goto invalid_format;
    if (*s != '"') goto invalid_format;

    /* strip '"' at the start */
    s++;

    for (;;) {
  if (s >= s_end) {
      rb_raise(rb_eRuntimeError, "unterminated dumped string");
  }

  if (*s == '"') {
      /* epilogue */
      s++;
      if (s == s_end) {
    /* ascii compatible dumped string */
    break;
      }
      else {
    static const char force_encoding_suffix[] = ".force_encoding(\""; /* "\")" */
    static const char dup_suffix[] = ".dup";
    const char *encname;
    int encidx;
    ptrdiff_t size;

    /* check separately for strings dumped by older versions */
    size = sizeof(dup_suffix) - 1;
    if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;

    size = sizeof(force_encoding_suffix) - 1;
    if (s_end - s <= size) goto invalid_format;
    if (memcmp(s, force_encoding_suffix, size) != 0) goto invalid_format;
    s += size;

    if (utf8) {
        rb_raise(rb_eRuntimeError, "dumped string contained Unicode escape but used force_encoding");
    }

    encname = s;
    s = memchr(s, '"', s_end-s);
    size = s - encname;
    if (!s) goto invalid_format;
    if (s_end - s != 2) goto invalid_format;
    if (s[0] != '"' || s[1] != ')') goto invalid_format;

    encidx = rb_enc_find_index2(encname, (long)size);
    if (encidx < 0) {
        rb_raise(rb_eRuntimeError, "dumped string has unknown encoding name");
    }
    rb_enc_associate_index(undumped, encidx);
      }
      break;
  }

  if (*s == '\\') {
      s++;
      if (s >= s_end) {
    rb_raise(rb_eRuntimeError, "invalid escape");
      }
      undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
  }
  else {
      rb_str_cat(undumped, s++, 1);
  }
    }

    return undumped;
invalid_format:
    rb_raise(rb_eRuntimeError, "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
}

#unicode_normalize(form = :nfc) ⇒ Object

Unicode Normalization—Returns a normalized form of str, using Unicode normalizations NFC, NFD, NFKC, or NFKD. The normalization form used is determined by form, which can be any of the four values :nfc, :nfd, :nfkc, or :nfkd. The default is :nfc.

If the string is not in a Unicode Encoding, then an Exception is raised. In this context, 'Unicode Encoding' means any of UTF-8, UTF-16BE/LE, and UTF-32BE/LE, as well as GB18030, UCS_2BE, and UCS_4BE. Anything other than UTF-8 is implemented by converting to UTF-8, which makes it slower than UTF-8.

"a\u0300".unicode_normalize        #=> "\u00E0"
"a\u0300".unicode_normalize(:nfc)  #=> "\u00E0"
"\u00E0".unicode_normalize(:nfd)   #=> "a\u0300"
"\xE0".force_encoding('ISO-8859-1').unicode_normalize(:nfd)
                                   #=> Encoding::CompatibilityError raised

10609
10610
10611
10612
10613
# File 'string.c', line 10609

static VALUE
rb_str_unicode_normalize(int argc, VALUE *argv, VALUE str)
{
    return unicode_normalize_common(argc, argv, str, id_normalize);
}

#unicode_normalize!(form = :nfc) ⇒ Object

Destructive version of String#unicode_normalize, doing Unicode normalization in place.


10622
10623
10624
10625
10626
# File 'string.c', line 10622

static VALUE
rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
{
    return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
}

#unicode_normalized?(form = :nfc) ⇒ Boolean

Checks whether str is in Unicode normalization form form, which can be any of the four values :nfc, :nfd, :nfkc, or :nfkd. The default is :nfc.

If the string is not in a Unicode Encoding, then an Exception is raised. For details, see String#unicode_normalize.

"a\u0300".unicode_normalized?        #=> false
"a\u0300".unicode_normalized?(:nfd)  #=> true
"\u00E0".unicode_normalized?         #=> true
"\u00E0".unicode_normalized?(:nfd)   #=> false
"\xE0".force_encoding('ISO-8859-1').unicode_normalized?
                                     #=> Encoding::CompatibilityError raised

10645
10646
10647
10648
10649
# File 'string.c', line 10645

static VALUE
rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str)
{
    return unicode_normalize_common(argc, argv, str, id_normalized_p);
}

#upcaseString #upcase([options]) ⇒ String

Returns a copy of str with all lowercase letters replaced with their uppercase counterparts.

See String#downcase for meaning of options and use with different encodings.

"hEllO".upcase   #=> "HELLO"

6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
# File 'string.c', line 6709

static VALUE
rb_str_upcase(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
    VALUE ret;

    flags = check_case_options(argc, argv, flags);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        ret = rb_str_new_with_class(str, RSTRING_PTR(str), RSTRING_LEN(str));
        str_enc_copy(ret, str);
        upcase_single(ret);
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY) {
        ret = rb_str_new_with_class(str, 0, RSTRING_LEN(str));
        rb_str_ascii_casemap(str, ret, &flags, enc);
    }
    else {
        ret = rb_str_casemap(str, &flags, enc);
    }

    return ret;
}

#upcase!String? #upcase!([options]) ⇒ String?

Upcases the contents of str, returning nil if no changes were made.

See String#downcase for meaning of options and use with different encodings.


6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
# File 'string.c', line 6673

static VALUE
rb_str_upcase_bang(int argc, VALUE *argv, VALUE str)
{
    rb_encoding *enc;
    OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;

    flags = check_case_options(argc, argv, flags);
    str_modify_keep_cr(str);
    enc = str_true_enc(str);
    if (case_option_single_p(flags, enc, str)) {
        if (upcase_single(str))
            flags |= ONIGENC_CASE_MODIFIED;
    }
    else if (flags&ONIGENC_CASE_ASCII_ONLY)
        rb_str_ascii_casemap(str, str, &flags, enc);
    else
  str_shared_replace(str, rb_str_casemap(str, &flags, enc));

    if (ONIGENC_CASE_MODIFIED&flags) return str;
    return Qnil;
}

#upto(other_str, exclusive = false) {|s| ... } ⇒ String #upto(other_str, exclusive = false) ⇒ Object

Iterates through successive values, starting at str and ending at other_str inclusive, passing each value in turn to the block. The String#succ method is used to generate each value. If optional second argument exclusive is omitted or is false, the last value will be included; otherwise it will be excluded.

If no block is given, an enumerator is returned instead.

"a8".upto("b6") {|s| print s, ' ' }
for s in "a8".."b6"
  print s, ' '
end

produces:

a8 a9 b0 b1 b2 b3 b4 b5 b6
a8 a9 b0 b1 b2 b3 b4 b5 b6

If str and other_str contains only ascii numeric characters, both are recognized as decimal numbers. In addition, the width of string (e.g. leading zeros) is handled appropriately.

"9".upto("11").to_a   #=> ["9", "10", "11"]
"25".upto("5").to_a   #=> []
"07".upto("11").to_a  #=> ["07", "08", "09", "10", "11"]

Overloads:

  • #upto(other_str, exclusive = false) {|s| ... } ⇒ String

    Yields:

    • (s)

4267
4268
4269
4270
4271
4272
4273
4274
4275
# File 'string.c', line 4267

static VALUE
rb_str_upto(int argc, VALUE *argv, VALUE beg)
{
    VALUE end, exclusive;

    rb_scan_args(argc, argv, "11", &end, &exclusive);
    RETURN_ENUMERATOR(beg, argc, argv);
    return rb_str_upto_each(beg, end, RTEST(exclusive), str_upto_i, Qnil);
}

#valid_encoding?Boolean

Returns true for a string which is encoded correctly.

"\xc2\xa1".force_encoding("UTF-8").valid_encoding?  #=> true
"\xc2".force_encoding("UTF-8").valid_encoding?      #=> false
"\x80".force_encoding("UTF-8").valid_encoding?      #=> false

10159
10160
10161
10162
10163
10164
10165
# File 'string.c', line 10159

static VALUE
rb_str_valid_encoding_p(VALUE str)
{
    int cr = rb_enc_str_coderange(str);

    return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
}