Class: MatchData

Inherits:
Object show all
Defined in:
re.c,
re.c

Overview

MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp.last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $', $`, $1, $2, and so on.

Instance Method Summary collapse

Instance Method Details

#==(mtch2) ⇒ Boolean

Equality---Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Returns:

  • (Boolean)

2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
# File 're.c', line 2647

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#[](i) ⇒ String? #[](start, length) ⇒ Array #[](range) ⇒ Array #[](name) ⇒ String?

Match Reference -- MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
m[0]       #=> "HX1138"
m[1, 2]    #=> ["H", "X"]
m[1..3]    #=> ["H", "X", "113"]
m[-3, 2]   #=> ["X", "113"]

m = /(?<foo>a+)b/.match("ccaaab")
m          #=> #<MatchData "aaab" foo:"aaa">
m["foo"]   #=> "aaa"
m[:foo]    #=> "aaa"

Overloads:


1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
# File 're.c', line 1711

static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
    VALUE idx, rest;

    match_check(match);
    rb_scan_args(argc, argv, "11", &idx, &rest);

    if (NIL_P(rest)) {
	if (FIXNUM_P(idx)) {
	    if (FIX2INT(idx) >= 0) {
		return rb_reg_nth_match(FIX2INT(idx), match);
	    }
	}
	else {
	    const char *p;
	    int num;

	    switch (TYPE(idx)) {
	      case T_SYMBOL:
		p = rb_id2name(SYM2ID(idx));
		goto name_to_backref;
		break;
	      case T_STRING:
		p = StringValuePtr(idx);

	      name_to_backref:
		num = name_to_backref_number(RMATCH_REGS(match),
					     RMATCH(match)->regexp, p, p + strlen(p));
		return rb_reg_nth_match(num, match);
		break;

	      default:
		break;
	    }
	}
    }

    return rb_ary_aref(argc, argv, match_to_a(match));
}

#begin(n) ⇒ Integer

Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0)       #=> 1
m.begin(2)       #=> 2

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.begin(:foo)  #=> 0
p m.begin(:bar)  #=> 2

Returns:


1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
# File 're.c', line 1134

static VALUE
match_begin(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
}

#capturesArray

Returns the array of captures; equivalent to mtch.to_a[1..-1].

f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"
f4    #=> "8"

Returns:


1659
1660
1661
1662
1663
# File 're.c', line 1659

static VALUE
match_captures(VALUE match)
{
    return match_array(match, 1);
}

#end(n) ⇒ Integer

Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0)         #=> 7
m.end(2)         #=> 3

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.end(:foo)    #=> 1
p m.end(:bar)    #=> 3

Returns:


1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
# File 're.c', line 1169

static VALUE
match_end(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return Qnil;

    update_char_offset(match);
    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
}

#==(mtch2) ⇒ Boolean

Equality---Two matchdata are equal if their target strings,

patterns, and matched positions are identical.

Returns:

  • (Boolean)

Returns:

  • (Boolean)

2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
# File 're.c', line 2647

static VALUE
match_equal(VALUE match1, VALUE match2)
{
    const struct re_registers *regs1, *regs2;
    if (match1 == match2) return Qtrue;
    if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
    if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
    if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
    regs1 = RMATCH_REGS(match1);
    regs2 = RMATCH_REGS(match2);
    if (regs1->num_regs != regs2->num_regs) return Qfalse;
    if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
    if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
    return Qtrue;
}

#hashInteger

Produce a hash based on the target string, regexp and matched positions of this matchdata.

Returns:


2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
# File 're.c', line 2624

static VALUE
match_hash(VALUE match)
{
    const struct re_registers *regs;
    st_index_t hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));

    rb_hash_uint(hashval, reg_hash(RMATCH(match)->regexp));
    regs = RMATCH_REGS(match);
    hashval = rb_hash_uint(hashval, regs->num_regs);
    hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
    hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
    hashval = rb_hash_end(hashval);
    return LONG2FIX(hashval);
}

#initialize_copyObject

:nodoc:


952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
# File 're.c', line 952

static VALUE
match_init_copy(VALUE obj, VALUE orig)
{
    struct rmatch *rm;

    if (!OBJ_INIT_COPY(obj, orig)) return obj;

    RMATCH(obj)->str = RMATCH(orig)->str;
    RMATCH(obj)->regexp = RMATCH(orig)->regexp;

    rm = RMATCH(obj)->rmatch;
    onig_region_copy(&rm->regs, RMATCH_REGS(orig));

    if (!RMATCH(orig)->rmatch->char_offset_updated) {
        rm->char_offset_updated = 0;
    }
    else {
        if (rm->char_offset_num_allocated < rm->regs.num_regs) {
            REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
            rm->char_offset_num_allocated = rm->regs.num_regs;
        }
        MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
               struct rmatch_offset, rm->regs.num_regs);
        rm->char_offset_updated = 1;
    }

    return obj;
}

#inspectString

Returns a printable version of mtch.

puts /.$/.match("foo").inspect
#=> #<MatchData "o">

puts /(.)(.)(.)/.match("foo").inspect
#=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">

puts /(.)(.)?(.)/.match("fo").inspect
#=> #<MatchData "fo" 1:"f" 2:nil 3:"o">

puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
#=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">

Returns:


1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
# File 're.c', line 1863

static VALUE
match_inspect(VALUE match)
{
    const char *cname = rb_obj_classname(match);
    VALUE str;
    int i;
    struct re_registers *regs = RMATCH_REGS(match);
    int num_regs = regs->num_regs;
    struct backref_name_tag *names;
    VALUE regexp = RMATCH(match)->regexp;

    if (regexp == 0) {
        return rb_sprintf("#<%s:%p>", cname, (void*)match);
    }

    names = ALLOCA_N(struct backref_name_tag, num_regs);
    MEMZERO(names, struct backref_name_tag, num_regs);

    onig_foreach_name(RREGEXP(regexp)->ptr,
            match_inspect_name_iter, names);

    str = rb_str_buf_new2("#<");
    rb_str_buf_cat2(str, cname);

    for (i = 0; i < num_regs; i++) {
        VALUE v;
        rb_str_buf_cat2(str, " ");
        if (0 < i) {
            if (names[i].name)
                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
            else {
                rb_str_catf(str, "%d", i);
            }
            rb_str_buf_cat2(str, ":");
        }
        v = rb_reg_nth_match(i, match);
        if (v == Qnil)
            rb_str_buf_cat2(str, "nil");
        else
            rb_str_buf_append(str, rb_str_inspect(v));
    }
    rb_str_buf_cat2(str, ">");

    return str;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:


1032
1033
1034
1035
1036
1037
# File 're.c', line 1032

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#namesArray

Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.

/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
#=> ["foo", "bar", "baz"]

m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
m.names                          #=> ["x", "y"]

Returns:


1013
1014
1015
1016
1017
1018
# File 're.c', line 1013

static VALUE
match_names(VALUE match)
{
    match_check(match);
    return rb_reg_names(RMATCH(match)->regexp);
}

#offset(n) ⇒ Array

Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0)      #=> [1, 7]
m.offset(4)      #=> [6, 7]

m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
p m.offset(:foo) #=> [0, 1]
p m.offset(:bar) #=> [2, 3]

Returns:


1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
# File 're.c', line 1098

static VALUE
match_offset(VALUE match, VALUE n)
{
    int i = match_backref_number(match, n);
    struct re_registers *regs = RMATCH_REGS(match);

    match_check(match);
    if (i < 0 || regs->num_regs <= i)
	rb_raise(rb_eIndexError, "index %d out of matches", i);

    if (BEG(i) < 0)
	return rb_assoc_new(Qnil, Qnil);

    update_char_offset(match);
    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
			INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
}

#post_matchString

Returns the portion of the original string after the current match. Equivalent to the special variable $'.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match   #=> ": The Movie"

Returns:


1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
# File 're.c', line 1527

VALUE
rb_reg_match_post(VALUE match)
{
    VALUE str;
    long pos;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = RMATCH(match)->str;
    pos = END(0);
    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#pre_matchString

Returns the portion of the original string before the current match. Equivalent to the special variable $`.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match   #=> "T"

Returns:


1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
# File 're.c', line 1500

VALUE
rb_reg_match_pre(VALUE match)
{
    VALUE str;
    struct re_registers *regs;

    if (NIL_P(match)) return Qnil;
    match_check(match);
    regs = RMATCH_REGS(match);
    if (BEG(0) == -1) return Qnil;
    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    return str;
}

#regexpRegexp

Returns the regexp.

m = /a.*b/.match("abc")
m.regexp #=> /a.*b/

Returns:


992
993
994
995
996
997
# File 're.c', line 992

static VALUE
match_regexp(VALUE match)
{
    match_check(match);
    return RMATCH(match)->regexp;
}

#lengthInteger #sizeInteger

Returns the number of elements in the match array.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length   #=> 5
m.size     #=> 5

Overloads:


1032
1033
1034
1035
1036
1037
# File 're.c', line 1032

static VALUE
match_size(VALUE match)
{
    match_check(match);
    return INT2FIX(RMATCH_REGS(match)->num_regs);
}

#stringString

Returns a frozen copy of the string passed in to match.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string   #=> "THX1138."

Returns:


1817
1818
1819
1820
1821
1822
# File 're.c', line 1817

static VALUE
match_string(VALUE match)
{
    match_check(match);
    return RMATCH(match)->str;	/* str is frozen */
}

#to_aArray

Returns the array of matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a   #=> ["HX1138", "H", "X", "113", "8"]

Because to_a is called when expanding *variable, there's a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).

all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all   #=> "HX1138"
f1    #=> "H"
f2    #=> "X"
f3    #=> "113"

Returns:


1640
1641
1642
1643
1644
# File 're.c', line 1640

static VALUE
match_to_a(VALUE match)
{
    return match_array(match, 0);
}

#to_sString

Returns the entire matched string.

m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s   #=> "HX1138"

Returns:


1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
# File 're.c', line 1794

static VALUE
match_to_s(VALUE match)
{
    VALUE str = rb_reg_last_match(match);

    match_check(match);
    if (NIL_P(str)) str = rb_str_new(0,0);
    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
    if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
    return str;
}

#values_at([index]) ⇒ Array

Uses each index to access the matching values, returning an array of the corresponding matches.

m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]

Returns:


1773
1774
1775
1776
1777
1778
1779
1780
1781
# File 're.c', line 1773

static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
    struct re_registers *regs;

    match_check(match);
    regs = RMATCH_REGS(match);
    return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
}