173
173
static int reg_kcode = DEFAULT_KCODE;
175
static int char_to_option(int c)
181
val = ONIG_OPTION_IGNORECASE;
184
val = ONIG_OPTION_EXTEND;
187
val = ONIG_OPTION_MULTILINE;
196
extern int rb_char_to_option_kcode(int c, int *option, int *kcode)
202
*kcode = ARG_KCODE_NONE;
205
*kcode = ARG_KCODE_EUC;
208
*kcode = ARG_KCODE_SJIS;
211
*kcode = ARG_KCODE_UTF8;
215
*option = char_to_option(c);
219
return ((*kcode == 0 && *option == 0) ? 0 : 1);
222
static int char_to_arg_kcode(int c)
226
if (ISUPPER(c)) c = tolower(c);
228
(void )rb_char_to_option_kcode(c, &option, &kcode);
176
char_to_option(int c)
182
val = ONIG_OPTION_IGNORECASE;
185
val = ONIG_OPTION_EXTEND;
188
val = ONIG_OPTION_MULTILINE;
198
rb_char_to_option_kcode(int c, int *option, int *kcode)
204
*kcode = ARG_KCODE_NONE;
207
*kcode = ARG_KCODE_EUC;
210
*kcode = ARG_KCODE_SJIS;
213
*kcode = ARG_KCODE_UTF8;
217
*option = char_to_option(c);
221
return ((*kcode == 0 && *option == 0) ? 0 : 1);
225
char_to_arg_kcode(int c)
229
if (ISUPPER(c)) c = tolower(c);
231
(void )rb_char_to_option_kcode(c, &option, &kcode);
233
236
kcode_to_arg_value(unsigned int kcode)
235
switch (kcode & KCODE_MASK) {
237
return ARG_KCODE_NONE;
239
return ARG_KCODE_EUC;
241
return ARG_KCODE_SJIS;
243
return ARG_KCODE_UTF8;
238
switch (kcode & KCODE_MASK) {
240
return ARG_KCODE_NONE;
242
return ARG_KCODE_EUC;
244
return ARG_KCODE_SJIS;
246
return ARG_KCODE_UTF8;
250
253
set_re_kcode_by_option(struct RRegexp *re, int options)
252
switch (options & ARG_KCODE_MASK) {
254
FL_UNSET(re, KCODE_MASK);
255
FL_SET(re, KCODE_FIXED);
258
FL_UNSET(re, KCODE_MASK);
259
FL_SET(re, KCODE_EUC);
260
FL_SET(re, KCODE_FIXED);
263
FL_UNSET(re, KCODE_MASK);
264
FL_SET(re, KCODE_SJIS);
265
FL_SET(re, KCODE_FIXED);
268
FL_UNSET(re, KCODE_MASK);
269
FL_SET(re, KCODE_UTF8);
270
FL_SET(re, KCODE_FIXED);
255
switch (options & ARG_KCODE_MASK) {
257
FL_UNSET(re, KCODE_MASK);
258
FL_SET(re, KCODE_FIXED);
261
FL_UNSET(re, KCODE_MASK);
262
FL_SET(re, KCODE_EUC);
263
FL_SET(re, KCODE_FIXED);
266
FL_UNSET(re, KCODE_MASK);
267
FL_SET(re, KCODE_SJIS);
268
FL_SET(re, KCODE_FIXED);
271
FL_UNSET(re, KCODE_MASK);
272
FL_SET(re, KCODE_UTF8);
273
FL_SET(re, KCODE_FIXED);
275
FL_SET(re, reg_kcode);
278
FL_SET(re, reg_kcode);
281
284
re_to_kcode_arg_value(VALUE re)
283
return kcode_to_arg_value(RBASIC(re)->flags);
286
return kcode_to_arg_value(RBASIC(re)->flags);
286
289
static int curr_kcode;
491
494
* comparing the two, as the source of the regular expression itself may
492
495
* differ, as the example shows). <code>Regexp#inspect</code> produces a
493
496
* generally more readable version of <i>rxp</i>.
495
498
* r1 = /ab+c/ix #=> /ab+c/ix
496
499
* s1 = r1.to_s #=> "(?ix-m:ab+c)"
497
500
* r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
624
627
* rxp.options => fixnum
626
629
* Returns the set of bits corresponding to the options used when creating this
627
630
* Regexp (see <code>Regexp::new</code> for details. Note that additional bits
628
631
* may be set in the returned options: these are used internally by the regular
629
632
* expression code. These extra bits are ignored if the options are passed to
630
633
* <code>Regexp::new</code>.
632
635
* Regexp::IGNORECASE #=> 1
633
636
* Regexp::EXTENDED #=> 2
634
637
* Regexp::MULTILINE #=> 4
636
639
* /cat/.options #=> 128
637
640
* /cat/ix.options #=> 131
638
641
* Regexp.new('cat', true).options #=> 129
639
642
* Regexp.new('cat', 0, 's').options #=> 384
642
645
* Regexp.new(r.source, r.options) #=> /cat/ix
1205
1208
* mtch.to_a => anArray
1207
1210
* Returns the array of matches.
1209
1212
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1210
1213
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
1212
1215
* Because <code>to_a</code> is called when expanding
1213
1216
* <code>*</code><em>variable</em>, there's a useful assignment
1214
1217
* shortcut for extracting matched fields. This is slightly slower than
1215
1218
* accessing the fields directly (as an intermediate array is
1218
1221
* all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
1219
1222
* all #=> "HX1138"
1270
1273
* mtch[start, length] => array
1271
1274
* mtch[range] => array
1272
1275
* mtch[name] => str or nil
1274
1277
* Match Reference---<code>MatchData</code> acts as an array, and may be
1275
1278
* accessed using the normal array indexing techniques. <i>mtch</i>[0] is
1276
1279
* equivalent to the special variable <code>$&</code>, and returns the entire
1277
1280
* matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
1278
1281
* of the matched backreferences (portions of the pattern between parentheses).
1280
1283
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1281
1284
* m[0] #=> "HX1138"
1282
1285
* m[1, 2] #=> ["H", "X"]
1340
1343
if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4)
1341
1344
rb_raise(rb_eSecurityError, "Insecure: can't modify regexp");
1342
1345
* mtch.select([index]*) => array
1344
1347
* Uses each <i>index</i> to access the matching values, returning an array of
1345
1348
* the corresponding matches.
1347
1350
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
1348
1351
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
1349
1352
* m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
1361
1364
* mtch.select([index]*) => array
1363
1366
* Uses each <i>index</i> to access the matching values, returning an
1364
1367
* array of the corresponding matches.
1366
1369
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
1367
1370
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
1368
1371
* m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
1561
1564
* rxp == other_rxp => true or false
1562
1565
* rxp.eql?(other_rxp) => true or false
1564
1567
* Equality---Two regexps are equal if their patterns are identical, they have
1565
1568
* the same character set code, and their <code>casefold?</code> values are the
1568
1571
* /abc/ == /abc/x #=> false
1569
1572
* /abc/ == /abc/i #=> false
1570
1573
* /abc/u == /abc/n #=> false
1711
1714
* rxp.match(str) => matchdata or nil
1712
1715
* rxp.match(str,pos) => matchdata or nil
1714
1717
* Returns a <code>MatchData</code> object describing the match, or
1715
1718
* <code>nil</code> if there was no match. This is equivalent to retrieving the
1716
1719
* value of the special variable <code>$~</code> following a normal match.
1717
1720
* If the second parameter is present, it specifies the position in the string
1718
1721
* to begin the search.
1720
1723
* /(.)(.)(.)/.match("abc")[2] #=> "b"
1721
1724
* /(.)(.)/.match("abc", 1)[2] #=> "c"
1757
1760
* Regexp.new(regexp) => regexp
1758
1761
* Regexp.compile(string [, options [, lang]]) => regexp
1759
1762
* Regexp.compile(regexp) => regexp
1761
1764
* Constructs a new regular expression from <i>pattern</i>, which can be either
1762
1765
* a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
1763
1766
* options are propagated, and new options may not be specified (a change as of
1768
1771
* <code>nil</code>, the regexp will be case insensitive. The <i>lang</i>
1769
1772
* parameter enables multibyte support for the regexp: `n', `N' = none, `e',
1770
1773
* `E' = EUC, `s', `S' = SJIS, `u', `U' = UTF-8.
1772
1775
* r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
1773
1776
* r2 = Regexp.new('cat', true) #=> /cat/i
1774
1777
* r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x
1905
1908
* Regexp.escape(str) => a_str
1906
1909
* Regexp.quote(str) => a_str
1908
1911
* Escapes any characters that would have special meaning in a regular
1909
1912
* expression. Returns a new escaped string, or self if no characters are
1910
1913
* escaped. For any string,
1911
1914
* <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
1913
1916
* Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
1967
1970
* Regexp.union([pattern]*) => new_str
1969
1972
* Return a <code>Regexp</code> object that is the union of the given
1970
1973
* <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s
1971
1974
* can be Regexp objects, in which case their options will be preserved, or
1972
1975
* Strings. If no arguments are given, returns <code>/(?!)/</code>.
1974
1977
* Regexp.union #=> /(?!)/
1975
1978
* Regexp.union("penzance") #=> /penzance/
1976
1979
* Regexp.union("skiing", "sledding") #=> /skiing|sledding/
2280
2283
* Regexp.last_match => matchdata
2281
2284
* Regexp.last_match(fixnum) => str
2283
2286
* The first form returns the <code>MatchData</code> object generated by the
2284
2287
* last successful pattern match. Equivalent to reading the global variable
2285
2288
* <code>$~</code>. The second form returns the nth field in this
2286
2289
* <code>MatchData</code> object.
2288
2291
* /c(.)t/ =~ 'cat' #=> 0
2289
2292
* Regexp.last_match #=> #<MatchData:0x401b3d30>
2290
2293
* Regexp.last_match(0) #=> "cat"