1
# Commands covered: regexp, regsub
3
# This file contains a collection of tests for one or more of the Tcl
4
# built-in commands. Sourcing this file into Tcl runs the tests and
5
# generates output for errors. No output means no errors were found.
7
# Copyright (c) 1991-1993 The Regents of the University of California.
8
# Copyright (c) 1998 Sun Microsystems, Inc.
9
# Copyright (c) 1998-1999 by Scriptics Corporation.
11
# See the file "license.terms" for information on usage and redistribution
12
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
16
if {[lsearch [namespace children] ::tcltest] == -1} {
17
package require tcltest 2
18
namespace import -force ::tcltest::*
21
# Procedure to evaluate a script within a proc, to test compilation
24
proc evalInProc { script } {
25
proc testProc {} $script
31
#return [list $status $result]
35
test regexpComp-1.1 {basic regexp operation} {
40
test regexpComp-1.2 {basic regexp operation} {
45
test regexpComp-1.3 {basic regexp operation} {
50
test regexpComp-1.4 {basic regexp operation} {
52
regexp -- -gorp abc-gorpxxx
55
test regexpComp-1.5 {basic regexp operation} {
57
regexp {^([^ ]*)[ ]*([^ ]*)} "" a
60
test regexpComp-1.6 {basic regexp operation} {
61
list [catch {regexp {} abc} msg] $msg
63
test regexpComp-1.7 {regexp utf compliance} {
64
# if not UTF-8 aware, result is "0 1"
67
regexp "\u4e4eb q" "a\u4e4eb qw\u5e4e\x4e wq" bar
68
list [string compare $foo $bar] [regexp 4 $bar]
72
test regexpComp-2.1 {getting substrings back from regexp} {
75
list [regexp ab*c abbbbc foo] $foo
78
test regexpComp-2.2 {getting substrings back from regexp} {
82
list [regexp a(b*)c abbbbc foo f2] $foo $f2
85
test regexpComp-2.3 {getting substrings back from regexp} {
89
list [regexp a(b*)(c) abbbbc foo f2] $foo $f2
92
test regexpComp-2.4 {getting substrings back from regexp} {
97
list [regexp a(b*)(c) abbbbc foo f2 f3] $foo $f2 $f3
100
test regexpComp-2.5 {getting substrings back from regexp} {
102
set foo {}; set f1 {}; set f2 {}; set f3 {}; set f4 {}; set f5 {};
103
set f6 {}; set f7 {}; set f8 {}; set f9 {}; set fa {}; set fb {};
104
list [regexp (1*)(2*)(3*)(4*)(5*)(6*)(7*)(8*)(9*)(a*)(b*) \
105
12223345556789999aabbb \
106
foo f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb] $foo $f1 $f2 $f3 $f4 $f5 \
107
$f6 $f7 $f8 $f9 $fa $fb
109
} {1 12223345556789999aabbb 1 222 33 4 555 6 7 8 9999 aa bbb}
110
test regexpComp-2.6 {getting substrings back from regexp} {
112
set foo 2; set f2 2; set f3 2; set f4 2
113
list [regexp (a)(b)? xay foo f2 f3 f4] $foo $f2 $f3 $f4
116
test regexpComp-2.7 {getting substrings back from regexp} {
118
set foo 1; set f2 1; set f3 1; set f4 1
119
list [regexp (a)(b)?(c) xacy foo f2 f3 f4] $foo $f2 $f3 $f4
122
test regexpComp-2.8 {getting substrings back from regexp} {
125
list [regexp {^a*b} aaaab match] $match
129
test regexpComp-3.1 {-indices option to regexp} {
132
list [regexp -indices ab*c abbbbc foo] $foo
135
test regexpComp-3.2 {-indices option to regexp} {
139
list [regexp -indices a(b*)c abbbbc foo f2] $foo $f2
142
test regexpComp-3.3 {-indices option to regexp} {
146
list [regexp -indices a(b*)(c) abbbbc foo f2] $foo $f2
149
test regexpComp-3.4 {-indices option to regexp} {
154
list [regexp -indices a(b*)(c) abbbbc foo f2 f3] $foo $f2 $f3
156
} {1 {0 5} {1 4} {5 5}}
157
test regexpComp-3.5 {-indices option to regexp} {
159
set foo {}; set f1 {}; set f2 {}; set f3 {}; set f4 {}; set f5 {};
160
set f6 {}; set f7 {}; set f8 {}; set f9 {}
161
list [regexp -indices (1*)(2*)(3*)(4*)(5*)(6*)(7*)(8*)(9*) \
163
foo f1 f2 f3 f4 f5 f6 f7 f8 f9] $foo $f1 $f2 $f3 $f4 $f5 \
166
} {1 {0 16} {0 0} {1 3} {4 5} {6 6} {7 9} {10 10} {11 11} {12 12} {13 16}}
167
test regexpComp-3.6 {getting substrings back from regexp} {
169
set foo 2; set f2 2; set f3 2; set f4 2
170
list [regexp -indices (a)(b)? xay foo f2 f3 f4] $foo $f2 $f3 $f4
172
} {1 {1 1} {1 1} {-1 -1} {-1 -1}}
173
test regexpComp-3.7 {getting substrings back from regexp} {
175
set foo 1; set f2 1; set f3 1; set f4 1
176
list [regexp -indices (a)(b)?(c) xacy foo f2 f3 f4] $foo $f2 $f3 $f4
178
} {1 {1 2} {1 1} {-1 -1} {2 2}}
180
test regexpComp-4.1 {-nocase option to regexp} {
182
regexp -nocase foo abcFOo
185
test regexpComp-4.2 {-nocase option to regexp} {
190
list [regexp -nocase {a(b*)([xy]*)z} aBbbxYXxxZ22 f1 f2 f3] $f1 $f2 $f3
192
} {1 aBbbxYXxxZ Bbb xYXxx}
193
test regexpComp-4.3 {-nocase option to regexp} {
195
regexp -nocase FOo abcFOo
198
set ::x abcdefghijklmnopqrstuvwxyz1234567890
199
set ::x $x$x$x$x$x$x$x$x$x$x$x$x
200
test regexpComp-4.4 {case conversion in regexp} {
202
list [regexp -nocase $::x $::x foo] $foo
207
test regexpComp-5.1 {exercise cache of compiled expressions} {
217
test regexpComp-5.2 {exercise cache of compiled expressions} {
227
test regexpComp-5.3 {exercise cache of compiled expressions} {
237
test regexpComp-5.4 {exercise cache of compiled expressions} {
247
test regexpComp-5.5 {exercise cache of compiled expressions} {
258
test regexpComp-6.1 {regexp errors} {
260
list [catch {regexp a} msg] $msg
262
} {1 {wrong # args: should be "regexp ?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"}}
263
test regexpComp-6.2 {regexp errors} {
265
list [catch {regexp -nocase a} msg] $msg
267
} {1 {wrong # args: should be "regexp ?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"}}
268
test regexpComp-6.3 {regexp errors} {
270
list [catch {regexp -gorp a} msg] $msg
272
} {1 {bad switch "-gorp": must be -all, -about, -indices, -inline, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}}
273
test regexpComp-6.4 {regexp errors} {
275
list [catch {regexp a( b} msg] $msg
277
} {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
278
test regexpComp-6.5 {regexp errors} {
280
list [catch {regexp a( b} msg] $msg
282
} {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
283
test regexpComp-6.6 {regexp errors} {
285
list [catch {regexp a a f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1} msg] $msg
288
test regexpComp-6.7 {regexp errors} {
290
list [catch {regexp (x)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) xyzzy} msg] $msg
293
test regexpComp-6.8 {regexp errors} {
297
list [catch {regexp abc abc f1(f2)} msg] $msg
299
} {1 {couldn't set variable "f1(f2)"}}
300
test regexpComp-6.9 {regexp errors, -start bad int check} {
302
list [catch {regexp -start bogus {^$} {}} msg] $msg
304
} {1 {bad index "bogus": must be integer?[+-]integer? or end?[+-]integer?}}
306
test regexpComp-7.1 {basic regsub operation} {
308
list [regsub aa+ xaxaaaxaa 111&222 foo] $foo
310
} {1 xax111aaa222xaa}
311
test regexpComp-7.2 {basic regsub operation} {
313
list [regsub aa+ aaaxaa &111 foo] $foo
316
test regexpComp-7.3 {basic regsub operation} {
318
list [regsub aa+ xaxaaa 111& foo] $foo
321
test regexpComp-7.4 {basic regsub operation} {
323
list [regsub aa+ aaa 11&2&333 foo] $foo
326
test regexpComp-7.5 {basic regsub operation} {
328
list [regsub aa+ xaxaaaxaa &2&333 foo] $foo
330
} {1 xaxaaa2aaa333xaa}
331
test regexpComp-7.6 {basic regsub operation} {
333
list [regsub aa+ xaxaaaxaa 1&22& foo] $foo
335
} {1 xax1aaa22aaaxaa}
336
test regexpComp-7.7 {basic regsub operation} {
338
list [regsub a(a+) xaxaaaxaa {1\122\1} foo] $foo
341
test regexpComp-7.8 {basic regsub operation} {
343
list [regsub a(a+) xaxaaaxaa {1\\\122\1} foo] $foo
345
} "1 {xax1\\aa22aaxaa}"
346
test regexpComp-7.9 {basic regsub operation} {
348
list [regsub a(a+) xaxaaaxaa {1\\122\1} foo] $foo
350
} "1 {xax1\\122aaxaa}"
351
test regexpComp-7.10 {basic regsub operation} {
353
list [regsub a(a+) xaxaaaxaa {1\\&\1} foo] $foo
355
} "1 {xax1\\aaaaaxaa}"
356
test regexpComp-7.11 {basic regsub operation} {
358
list [regsub a(a+) xaxaaaxaa {1\&\1} foo] $foo
361
test regexpComp-7.12 {basic regsub operation} {
363
list [regsub a(a+) xaxaaaxaa {\1\1\1\1&&} foo] $foo
365
} {1 xaxaaaaaaaaaaaaaaxaa}
366
test regexpComp-7.13 {basic regsub operation} {
369
list [regsub abc xyz 111 foo] $foo
372
test regexpComp-7.14 {basic regsub operation} {
375
list [regsub ^ xyz "111 " foo] $foo
378
test regexpComp-7.15 {basic regsub operation} {
381
list [regsub -- -foo abc-foodef "111 " foo] $foo
384
test regexpComp-7.16 {basic regsub operation} {
387
list [regsub x "" y foo] $foo
390
test regexpComp-7.17 {regsub utf compliance} {
392
# if not UTF-8 aware, result is "0 1"
393
set foo "xyz555ijka\u4e4ebpqr"
394
regsub a\u4e4eb xyza\u4e4ebijka\u4e4ebpqr 555 bar
395
list [string compare $foo $bar] [regexp 4 $bar]
399
test regexpComp-8.1 {case conversion in regsub} {
401
list [regsub -nocase a(a+) xaAAaAAay & foo] $foo
404
test regexpComp-8.2 {case conversion in regsub} {
406
list [regsub -nocase a(a+) xaAAaAAay & foo] $foo
409
test regexpComp-8.3 {case conversion in regsub} {
412
list [regsub a(a+) xaAAaAAay & foo] $foo
415
test regexpComp-8.4 {case conversion in regsub} {
418
list [regsub -nocase a CaDE b foo] $foo
421
test regexpComp-8.5 {case conversion in regsub} {
424
list [regsub -nocase XYZ CxYzD b foo] $foo
427
test regexpComp-8.6 {case conversion in regsub} {
429
set x abcdefghijklmnopqrstuvwxyz1234567890
430
set x $x$x$x$x$x$x$x$x$x$x$x$x
432
list [regsub -nocase $x $x b foo] $foo
436
test regexpComp-9.1 {-all option to regsub} {
439
list [regsub -all x+ axxxbxxcxdx |&| foo] $foo
441
} {4 a|xxx|b|xx|c|x|d|x|}
442
test regexpComp-9.2 {-all option to regsub} {
445
list [regsub -nocase -all x+ aXxXbxxcXdx |&| foo] $foo
447
} {4 a|XxX|b|xx|c|X|d|x|}
448
test regexpComp-9.3 {-all option to regsub} {
451
list [regsub x+ axxxbxxcxdx |&| foo] $foo
454
test regexpComp-9.4 {-all option to regsub} {
457
list [regsub -all bc axxxbxxcxdx |&| foo] $foo
460
test regexpComp-9.5 {-all option to regsub} {
463
list [regsub -all node "node node more" yy foo] $foo
466
test regexpComp-9.6 {-all option to regsub} {
469
list [regsub -all ^ xxx 123 foo] $foo
473
test regexpComp-10.1 {expanded syntax in regsub} {
476
list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo
479
test regexpComp-10.2 {newline sensitivity in regsub} {
482
list [regsub -line {^a.*b$} "dabc\naxyb\n" 123 foo] $foo
485
test regexpComp-10.3 {newline sensitivity in regsub} {
488
list [regsub -line {^a.*b$} "dabc\naxyb\nxb" 123 foo] $foo
490
} "1 {dabc\n123\nxb}"
491
test regexpComp-10.4 {partial newline sensitivity in regsub} {
494
list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
497
test regexpComp-10.5 {inverse partial newline sensitivity in regsub} {
500
list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
504
test regexpComp-11.1 {regsub errors} {
506
list [catch {regsub a b} msg] $msg
508
} {1 {wrong # args: should be "regsub ?switches? exp string subSpec ?varName?"}}
509
test regexpComp-11.2 {regsub errors} {
511
list [catch {regsub -nocase a b} msg] $msg
513
} {1 {wrong # args: should be "regsub ?switches? exp string subSpec ?varName?"}}
514
test regexpComp-11.3 {regsub errors} {
516
list [catch {regsub -nocase -all a b} msg] $msg
518
} {1 {wrong # args: should be "regsub ?switches? exp string subSpec ?varName?"}}
519
test regexpComp-11.4 {regsub errors} {
521
list [catch {regsub a b c d e f} msg] $msg
523
} {1 {wrong # args: should be "regsub ?switches? exp string subSpec ?varName?"}}
524
test regexpComp-11.5 {regsub errors} {
526
list [catch {regsub -gorp a b c} msg] $msg
528
} {1 {bad switch "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}}
529
test regexpComp-11.6 {regsub errors} {
531
list [catch {regsub -nocase a( b c d} msg] $msg
533
} {1 {couldn't compile regular expression pattern: parentheses () not balanced}}
534
test regexpComp-11.7 {regsub errors} {
538
list [catch {regsub -nocase aaa aaa xxx f1(f2)} msg] $msg
540
} {1 {couldn't set variable "f1(f2)"}}
541
test regexpComp-11.8 {regsub errors, -start bad int check} {
543
list [catch {regsub -start bogus pattern string rep var} msg] $msg
545
} {1 {bad index "bogus": must be integer?[+-]integer? or end?[+-]integer?}}
547
# This test crashes on the Mac unless you increase the Stack Space to about 1
548
# Meg. This is probably bigger than most users want...
549
# 8.2.3 regexp reduced stack space requirements, but this should be
551
test regexpComp-12.1 {Tcl_RegExpExec: large number of subexpressions} {macCrash} {
553
list [regexp (.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) abcdefghijklmnopqrstuvwxyz all a b c d e f g h i j k l m n o p q r s t u v w x y z] $all $a $b $c $d $e $f $g $h $i $j $k $l $m $n $o $p $q $r $s $t $u $v $w $x $y $z
555
} {1 abcdefghijklmnopqrstuvwxyz a b c d e f g h i j k l m n o p q r s t u v w x y z}
557
test regexpComp-13.1 {regsub of a very large string} {
558
# This test is designed to stress the memory subsystem in order
559
# to catch Bug #933. It only fails if the Tcl memory allocator
562
set line {BEGIN_TABLE ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; END_TABLE}
563
set filedata [string repeat $line 200]
564
for {set i 1} {$i<10} {incr i} {
565
regsub -all "BEGIN_TABLE " $filedata "" newfiledata
570
test regexpComp-14.1 {CompileRegexp: regexp cache} {
582
test regexpComp-14.2 {CompileRegexp: regexp cache, different flags} {
591
regexp -nocase $x bbba
595
testConstraint exec [llength [info commands exec]]
596
test regexpComp-14.3 {CompileRegexp: regexp cache, empty regexp and empty cache} -constraints {
599
set junk [makeFile {puts [regexp {} foo]} junk.tcl]
601
exec [interpreter] $junk
606
test regexpComp-15.1 {regexp -start} {
608
list [regexp -start -10 {\d} 1abc2de3 x] $x
610
test regexpComp-15.2 {regexp -start} {
612
list [regexp -start 2 {\d} 1abc2de3 x] $x
614
test regexpComp-15.3 {regexp -start} {
616
list [regexp -start 4 {\d} 1abc2de3 x] $x
618
test regexpComp-15.4 {regexp -start} {
620
list [regexp -start 5 {\d} 1abc2de3 x] $x
622
test regexpComp-15.5 {regexp -start, over end of string} {
624
list [regexp -start [string length 1abc2de3] {\d} 1abc2de3 x] [info exists x]
626
test regexpComp-15.6 {regexp -start, loss of ^$ behavior} {
627
list [regexp -start 2 {^$} {}]
630
test regexpComp-16.1 {regsub -start} {
632
list [regsub -all -start 2 {\d} a1b2c3d4e5 {/&} x] $x
634
test regexpComp-16.2 {regsub -start} {
636
list [regsub -all -start -25 {z} hello {/&} x] $x
638
test regexpComp-16.3 {regsub -start} {
640
list [regsub -all -start 3 {z} hello {/&} x] $x
642
test regexpComp-16.4 {regsub -start, \A behavior} {
644
lappend out [regsub -start 0 -all {\A(\w)} {abcde} {/\1} x] $x
645
lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x
646
} {5 /a/b/c/d/e 3 ab/c/d/e}
648
test regexpComp-17.1 {regexp -inline} {
649
regexp -inline b ababa
651
test regexpComp-17.2 {regexp -inline} {
652
regexp -inline (b) ababa
654
test regexpComp-17.3 {regexp -inline -indices} {
655
regexp -inline -indices (b) ababa
657
test regexpComp-17.4 {regexp -inline} {
658
regexp -inline {\w(\d+)\w} " hello 23 there456def "
660
test regexpComp-17.5 {regexp -inline no matches} {
661
regexp -inline {\w(\d+)\w} ""
663
test regexpComp-17.6 {regexp -inline no matches} {
664
regexp -inline hello goodbye
666
test regexpComp-17.7 {regexp -inline, no matchvars allowed} {
667
list [catch {regexp -inline b abc match} msg] $msg
668
} {1 {regexp match variables not allowed when using -inline}}
670
test regexpComp-18.1 {regexp -all} {
673
test regexpComp-18.2 {regexp -all} {
674
regexp -all b abababbabaaaaaaaaaab
676
test regexpComp-18.3 {regexp -all -inline} {
677
regexp -all -inline b abababbabaaaaaaaaaab
679
test regexpComp-18.4 {regexp -all -inline} {
680
regexp -all -inline {\w(\w)} abcdefg
682
test regexpComp-18.5 {regexp -all -inline} {
683
regexp -all -inline {\w(\w)$} abcdefg
685
test regexpComp-18.6 {regexp -all -inline} {
686
regexp -all -inline {\d+} 10:20:30:40
688
test regexpComp-18.7 {regexp -all -inline} {
689
list [catch {regexp -all -inline b abc match} msg] $msg
690
} {1 {regexp match variables not allowed when using -inline}}
691
test regexpComp-18.8 {regexp -all} {
692
# This should not cause an infinite loop
693
regexp -all -inline {a*} a
695
test regexpComp-18.9 {regexp -all} {
696
# Yes, the expected result is {a {}}. Here's why:
697
# Start at index 0; a* matches the "a" there then stops.
698
# Go to index 1; a* matches the lambda (or {}) there then stops. Recall
699
# that a* matches zero or more "a"'s; thus it matches the string "b", as
700
# there are zero or more "a"'s there.
701
# Go to index 2; this is past the end of the string, so stop.
702
regexp -all -inline {a*} ab
704
test regexpComp-18.10 {regexp -all} {
705
# Yes, the expected result is {a {} a}. Here's why:
706
# Start at index 0; a* matches the "a" there then stops.
707
# Go to index 1; a* matches the lambda (or {}) there then stops. Recall
708
# that a* matches zero or more "a"'s; thus it matches the string "b", as
709
# there are zero or more "a"'s there.
710
# Go to index 2; a* matches the "a" there then stops.
711
# Go to index 3; this is past the end of the string, so stop.
712
regexp -all -inline {a*} aba
714
test regexpComp-18.11 {regexp -all} {
716
regexp -all -inline {^a} aaaa
719
test regexpComp-18.12 {regexp -all -inline -indices} {
721
regexp -all -inline -indices a(b(c)d|e(f)g)h abcdhaefgh
723
} {{0 4} {1 3} {2 2} {-1 -1} {5 9} {6 8} {-1 -1} {7 7}}
725
test regexpComp-19.1 {regsub null replacement} {
727
regsub -all {@} {@hel@lo@} "\0a\0" result
728
list $result [string length $result]
730
} "\0a\0hel\0a\0lo\0a\0 14"
732
test regexpComp-20.1 {regsub shared object shimmering} {
735
set a abcdefghijklmnopqurstuvwxyz
737
set c abcdefghijklmnopqurstuvwxyz0123456789
739
list $d [string length $d] [string bytelength $d]
741
} [list abcdefghijklmnopqurstuvwxyz0123456789 37 37]
742
test regexpComp-20.2 {regsub shared object shimmering with -about} {
744
eval regexp -about abc
748
test regexpComp-21.1 {regexp command compiling tests} {
753
test regexpComp-21.2 {regexp command compiling tests} {
755
regexp {^foo$} dogfood
758
test regexpComp-21.3 {regexp command compiling tests} {
764
test regexpComp-21.4 {regexp command compiling tests} {
769
test regexpComp-21.5 {regexp command compiling tests} {
771
regexp -nocase FOO dogfod
774
test regexpComp-21.6 {regexp command compiling tests} {
776
regexp -n foo dogfoOd
779
test regexpComp-21.7 {regexp command compiling tests} {
781
regexp -no -- FoO dogfood
784
test regexpComp-21.8 {regexp command compiling tests} {
789
test regexpComp-21.9 {regexp command compiling tests} {
791
list [catch {regexp -- -nocase foo dogfod} msg] $msg
794
test regexpComp-21.10 {regexp command compiling tests} {
796
list [regsub -all "" foo bar str] $str
799
test regexpComp-21.11 {regexp command compiling tests} {
801
list [regsub -all "" "" bar str] $str
806
foreach {str exp result} {
821
test regexpComp-22.[incr i] {regexp command compiling tests} \
822
[subst {evalInProc {set a "$str"; regexp {$exp} \$a}}] $result
826
::tcltest::cleanupTests