MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp#last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $’, $`, $1, $2, and so on. Matchdata is also known as MatchingData.
Match Reference—MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m[0] #=> "HX1138"
m[1, 2] #=> ["H", "X"]
m[1..3] #=> ["H", "X", "113"]
m[-3, 2] #=> ["X", "113"]
/*
* call-seq:
* mtch[i] => obj
* mtch[start, length] => array
* mtch[range] => array
*
* Match Reference---<code>MatchData</code> acts as an array, and may be
* accessed using the normal array indexing techniques. <i>mtch</i>[0] is
* equivalent to the special variable <code>$&</code>, and returns the entire
* matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
* of the matched backreferences (portions of the pattern between parentheses).
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m[0] #=> "HX1138"
* m[1, 2] #=> ["H", "X"]
* m[1..3] #=> ["H", "X", "113"]
* m[-3, 2] #=> ["X", "113"]
*/
static VALUE
match_aref(argc, argv, match)
int argc;
VALUE *argv;
VALUE match;
{
VALUE idx, rest;
rb_scan_args(argc, argv, "11", &idx, &rest);
if (NIL_P(rest)) {
if (FIXNUM_P(idx)) {
if (FIX2INT(idx) >= 0) {
return rb_reg_nth_match(FIX2INT(idx), match);
}
}
else {
const char *p;
int num;
switch (TYPE(idx)) {
case T_SYMBOL:
p = rb_id2name(SYM2ID(idx));
goto name_to_backref;
break;
case T_STRING:
p = StringValuePtr(idx);
name_to_backref:
num = name_to_backref_number(RMATCH(match)->regs,
RMATCH(match)->regexp, p, p + strlen(p));
return rb_reg_nth_match(num, match);
break;
default:
break;
}
}
}
return rb_ary_aref(argc, argv, match_to_a(match));
}
Returns the offset of the start of the nth element of the match array in the string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.begin(0) #=> 1
m.begin(2) #=> 2
/*
* call-seq:
* mtch.begin(n) => integer
*
* Returns the offset of the start of the <em>n</em>th element of the match
* array in the string.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.begin(0) #=> 1
* m.begin(2) #=> 2
*/
static VALUE
match_begin(match, n)
VALUE match, n;
{
int i = NUM2INT(n);
if (i < 0 || RMATCH(match)->regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (RMATCH(match)->regs->beg[i] < 0)
return Qnil;
return INT2FIX(RMATCH(match)->regs->beg[i]);
}
Returns the array of captures; equivalent to mtch.to_a.
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
f4 #=> "8"
/*
* call-seq:
* mtch.captures => array
*
* Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
*
* f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
* f1 #=> "H"
* f2 #=> "X"
* f3 #=> "113"
* f4 #=> "8"
*/
static VALUE
match_captures(match)
VALUE match;
{
return match_array(match, 1);
}
Returns the offset of the character immediately following the end of the nth element of the match array in the string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.end(0) #=> 7
m.end(2) #=> 3
/*
* call-seq:
* mtch.end(n) => integer
*
* Returns the offset of the character immediately following the end of the
* <em>n</em>th element of the match array in the string.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.end(0) #=> 7
* m.end(2) #=> 3
*/
static VALUE
match_end(match, n)
VALUE match, n;
{
int i = NUM2INT(n);
if (i < 0 || RMATCH(match)->regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (RMATCH(match)->regs->beg[i] < 0)
return Qnil;
return INT2FIX(RMATCH(match)->regs->end[i]);
}
Returns a string representing obj. The default to_s prints the object‘s class and an encoding of the object id. As a special case, the top-level object that is the initial execution context of Ruby programs returns ``main.’‘
/*
* call-seq:
* obj.to_s => string
*
* Returns a string representing <i>obj</i>. The default
* <code>to_s</code> prints the object's class and an encoding of the
* object id. As a special case, the top-level object that is the
* initial execution context of Ruby programs returns ``main.''
*/
VALUE
rb_any_to_s(obj)
VALUE obj;
{
char *cname = rb_obj_classname(obj);
size_t len;
VALUE str;
len = strlen(cname)+6+16;
str = rb_str_new(0, len); /* 6:tags 16:addr */
snprintf(RSTRING(str)->ptr, len+1, "#<%s:0x%lx>", cname, obj);
RSTRING(str)->len = strlen(RSTRING(str)->ptr);
if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
return str;
}
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length #=> 5
m.size #=> 5
/*
* call-seq:
* mtch.length => integer
* mtch.size => integer
*
* Returns the number of elements in the match array.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.length #=> 5
* m.size #=> 5
*/
static VALUE
match_size(match)
VALUE match;
{
return INT2FIX(RMATCH(match)->regs->num_regs);
}
Returns a two-element array containing the beginning and ending offsets of the nth match.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.offset(0) #=> [1, 7]
m.offset(4) #=> [6, 7]
/*
* call-seq:
* mtch.offset(n) => array
*
* Returns a two-element array containing the beginning and ending offsets of
* the <em>n</em>th match.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.offset(0) #=> [1, 7]
* m.offset(4) #=> [6, 7]
*/
static VALUE
match_offset(match, n)
VALUE match, n;
{
int i = NUM2INT(n);
if (i < 0 || RMATCH(match)->regs->num_regs <= i)
rb_raise(rb_eIndexError, "index %d out of matches", i);
if (RMATCH(match)->regs->beg[i] < 0)
return rb_assoc_new(Qnil, Qnil);
return rb_assoc_new(INT2FIX(RMATCH(match)->regs->beg[i]),
INT2FIX(RMATCH(match)->regs->end[i]));
}
Returns the portion of the original string after the current match. Equivalent to the special variable $’.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.post_match #=> ": The Movie"
/*
* call-seq:
* mtch.post_match => str
*
* Returns the portion of the original string after the current match.
* Equivalent to the special variable <code>$'</code>.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
* m.post_match #=> ": The Movie"
*/
VALUE
rb_reg_match_post(match)
VALUE match;
{
VALUE str;
long pos;
if (NIL_P(match)) return Qnil;
if (RMATCH(match)->BEG(0) == -1) return Qnil;
str = RMATCH(match)->str;
pos = RMATCH(match)->END(0);
str = rb_str_substr(str, pos, RSTRING(str)->len - pos);
if (OBJ_TAINTED(match)) OBJ_TAINT(str);
return str;
}
Returns the portion of the original string before the current match. Equivalent to the special variable $`.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.pre_match #=> "T"
/*
* call-seq:
* mtch.pre_match => str
*
* Returns the portion of the original string before the current match.
* Equivalent to the special variable <code>$`</code>.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.pre_match #=> "T"
*/
VALUE
rb_reg_match_pre(match)
VALUE match;
{
VALUE str;
if (NIL_P(match)) return Qnil;
if (RMATCH(match)->BEG(0) == -1) return Qnil;
str = rb_str_substr(RMATCH(match)->str, 0, RMATCH(match)->BEG(0));
if (OBJ_TAINTED(match)) OBJ_TAINT(str);
return str;
}
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
/*
* call-seq:
* mtch.select([index]*) => array
*
* Uses each <i>index</i> to access the matching values, returning an
* array of the corresponding matches.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
* m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
*/
static VALUE
match_select(argc, argv, match)
int argc;
VALUE *argv;
VALUE match;
{
if (argc > 0) {
rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc);
}
else {
struct re_registers *regs = RMATCH(match)->regs;
VALUE target = RMATCH(match)->str;
VALUE result = rb_ary_new();
int i;
int taint = OBJ_TAINTED(match);
for (i=0; i<regs->num_regs; i++) {
VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]);
if (taint) OBJ_TAINT(str);
if (RTEST(rb_yield(str))) {
rb_ary_push(result, str);
}
}
return result;
}
}
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.length #=> 5
m.size #=> 5
/*
* call-seq:
* mtch.length => integer
* mtch.size => integer
*
* Returns the number of elements in the match array.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.length #=> 5
* m.size #=> 5
*/
static VALUE
match_size(match)
VALUE match;
{
return INT2FIX(RMATCH(match)->regs->num_regs);
}
Returns a frozen copy of the string passed in to match.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.string #=> "THX1138."
/*
* call-seq:
* mtch.string => str
*
* Returns a frozen copy of the string passed in to <code>match</code>.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.string #=> "THX1138."
*/
static VALUE
match_string(match)
VALUE match;
{
return RMATCH(match)->str; /* str is frozen */
}
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a is called when expanding *variable, there‘s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).
all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
all #=> "HX1138"
f1 #=> "H"
f2 #=> "X"
f3 #=> "113"
/*
* call-seq:
* mtch.to_a => anArray
*
* Returns the array of matches.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
*
* Because <code>to_a</code> is called when expanding
* <code>*</code><em>variable</em>, there's a useful assignment
* shortcut for extracting matched fields. This is slightly slower than
* accessing the fields directly (as an intermediate array is
* generated).
*
* all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
* all #=> "HX1138"
* f1 #=> "H"
* f2 #=> "X"
* f3 #=> "113"
*/
static VALUE
match_to_a(match)
VALUE match;
{
return match_array(match, 0);
}
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.")
m.to_s #=> "HX1138"
/*
* call-seq:
* mtch.to_s => str
*
* Returns the entire matched string.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.to_s #=> "HX1138"
*/
static VALUE
match_to_s(match)
VALUE match;
{
VALUE str = rb_reg_last_match(match);
if (NIL_P(str)) str = rb_str_new(0,0);
if (OBJ_TAINTED(match)) OBJ_TAINT(str);
if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
return str;
}
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
m.to_a #=> ["HX1138", "H", "X", "113", "8"]
m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
/*
* call-seq:
* mtch.select([index]*) => array
*
* Uses each <i>index</i> to access the matching values, returning an array of
* the corresponding matches.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
* m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
*/
static VALUE
match_values_at(argc, argv, match)
int argc;
VALUE *argv;
VALUE match;
{
return rb_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry);
}