2
;; Copyright (c) 2002 by The XFree86 Project, Inc.
4
;; Permission is hereby granted, free of charge, to any person obtaining a
5
;; copy of this software and associated documentation files (the "Software"),
6
;; to deal in the Software without restriction, including without limitation
7
;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
;; and/or sell copies of the Software, and to permit persons to whom the
9
;; Software is furnished to do so, subject to the following conditions:
11
;; The above copyright notice and this permission notice shall be included in
12
;; all copies or substantial portions of the Software.
14
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
;; THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
;; OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
;; Except as contained in this notice, the name of the XFree86 Project shall
23
;; not be used in advertising or otherwise to promote the sale, use or other
24
;; dealings in this Software without prior written authorization from the
27
;; Author: Paulo C�sar Pereira de Andrade
30
;; $XFree86: xc/programs/xedit/lisp/test/regex.lsp,v 1.2 2002/12/11 04:44:28 paulo Exp $
33
;; Basic regex tests. This file is only for xedit lisp and for it's regex
34
;; library. Note that the regex library used by xedit lisp is not mean't
35
;; to be fully compatible with most regexes, but to be as fast as possible.
36
;; This means that some patterns that looks basic may never be matched,
37
;; but it is expected that almost any pattern can be rewritten to be
38
;; matched, or in the worst case, it may be required to search in the
39
;; regions matched by a previous regex.
41
(defun re-test (expect &rest arguments &aux result (error t) unused error-value)
45
(setq result (apply #'re-exec arguments))
48
(format t "ERROR: (re-exec~{ ~S~}) => ~S~%" arguments error-value)
49
(or (equal result expect)
50
(format t "(re-exec~{ ~S~}) => should be ~S not ~S~%"
51
arguments expect result))))
53
;; errors only generated for regex compilation (or incorrect arguments)
54
(defun re-error (&rest arguments &aux result (error t) unused error-value)
58
(setq result (apply #'re-comp arguments))
61
(format t "ERROR: no error for (re-comp~{ ~S~})" arguments)))
72
(setq re (re-comp "abc"))
73
(re-test '((0 . 3)) re "abc")
74
(re-test '((0 . 3)) re "abc" :notbol t)
75
(re-test '((0 . 3)) re "abc" :noteol t)
76
(re-test '((0 . 3)) re "abc" :notbol t :noteol t)
77
(re-test '((14 . 17)) re "aaaaaaaaaaaaaaabc")
78
(re-test '((14 . 17)) re "aaaaaaaaaaaaaaabc" :start 12 :end 17)
79
(re-test '((30 . 33)) re "xxxxxxxxxxxxxxaaaaaaaaaaaaaaaaabcxx")
80
(re-test '((30 . 33)) re "xxxxxxxxxxxxxxaaaaaaaaaaaaaaaaabcxx" :start 28 :end 34)
82
(setq re (re-comp "^abc"))
83
(re-test '((0 . 3)) re "abc")
84
(re-test :nomatch re "xabc")
85
(re-test '((1 . 4)) re "xabc" :start 1)
86
(re-test :nomatch re "xabc" :start 1 :notbol t)
88
(setq re (re-comp "abc$"))
89
(re-test '((0 . 3)) re "abc")
90
(re-test :nomatch re "xabcx")
91
(re-test '((1 . 4)) re "xabcx" :end 4)
92
(re-test :nomatch re "xabc" :end 4 :noteol t)
94
(setq re (re-comp "^abc$"))
95
(re-test '((0 . 3)) re "abc")
96
(re-test :nomatch re "xabcx")
97
(re-test '((1 . 4)) re "xabcx" :start 1 :end 4)
98
(re-test :nomatch re "xabcx" :start 1 :end 4 :notbol t)
99
(re-test :nomatch re "xabcx" :start 1 :end 4 :noteol t)
100
(re-test :nomatch re "xabcx" :start 1 :end 4 :notbol t :noteol t)
101
(re-test nil re "abc" :count 0)
103
(setq re (re-comp "abc|bcd|cde"))
104
(re-test '((0 . 3)) re "abc")
105
(re-test '((1 . 4)) re "aabc")
106
(re-test '((3 . 6)) re "xxxbcdef")
107
(re-test '((8 . 11)) re "abdzzzcdabcde")
108
(re-test '((13 . 16)) re "xxxxabdecdabdcde")
110
(setq re (re-comp "^abc|bcd$|cde"))
111
(re-test '((0 . 3)) re "abcde")
112
(re-test '((3 . 6)) re "xabcde")
113
(re-test '((1 . 4)) re "xabcde" :start 1)
114
(re-test '((3 . 6)) re "xabcde" :start 1 :notbol t)
115
(re-test '((2 . 5)) re "xabcd")
116
(re-test :nomatch re "xabcd" :noteol t)
117
(re-test nil re "xabcd" :count 0)
118
(re-test :nomatch re "abcdx" :notbol t)
120
(setq re (re-comp "a?bc|ab?c|abc?"))
121
(re-test '((0 . 3)) re "abc")
122
(re-test :nomatch re "xxxb")
123
(re-test '((3 . 5)) re "xxxbc")
124
(re-test '((5 . 7)) re "sssssab")
125
(re-test '((0 . 3)) re "abcd")
126
(re-test '((1 . 4)) re "aabcdef")
127
(re-test '((1 . 3)) re "aabbccdef") ;; ab matches abc?
129
(setq re (re-comp "a?bc"))
130
(re-test '((2 . 4)) re "acbcd")
131
(re-test '((2 . 5)) re "acabcd")
133
(setq re (re-comp "ab?c"))
134
(re-test '((1 . 3)) re "xacc")
135
(re-test '((2 . 5)) re "xxabcc")
137
(setq re (re-comp "abc?"))
138
(re-test '((1 . 3)) re "xababc")
139
(re-test '((2 . 5)) re "xxabccabc")
141
(setq re (re-comp "a*bc|ab*c|abc*"))
142
(re-test '((0 . 9)) re "aaaaaaabc")
143
(re-test '((1 . 10)) re "xaaaaaaabc")
144
(re-test '((3 . 12)) re "xyzaaaaaaabc")
145
(re-test '((0 . 4)) re "abbc")
146
(re-test '((2 . 9)) re "xxabbbbbc")
147
(re-test '((0 . 12)) re "abcccccccccc")
148
(re-test '((0 . 12)) re "abccccccccccd")
149
(re-test '((16 . 29)) re "xxxxxxxaaaaaaaaaabbbbbbbbbbbccccccccccc")
150
(re-test '((11 . 13)) re "xxxbbbbbbbbbc")
151
(re-test '((8 . 10)) re "aaaaazbxacd")
153
(setq re (re-comp "a*bc"))
154
(re-test '((2 . 4)) re "acbcd")
155
(re-test '((2 . 5)) re "acabcd")
156
(re-test '((2 . 8)) re "acaaaabcd")
158
(setq re (re-comp "ab*c"))
159
(re-test '((1 . 3)) re "xacc")
160
(re-test '((2 . 5)) re "xxabcc")
161
(re-test '((3 . 8)) re "xxaabbbcc")
163
(setq re (re-comp "abc*"))
164
(re-test '((1 . 3)) re "xababc")
165
(re-test '((2 . 5)) re "xxabcbabccc")
166
(re-test '((3 . 7)) re "axxabccabc")
168
(setq re (re-comp "a+bc|ab+c|abc+"))
169
(re-test :nomatch re "xxxbc")
170
(re-test '((1 . 6)) re "xaaabc")
171
(re-test '((8 . 12)) re "zzzzaaaaabbc")
172
(re-test '((7 . 15)) re "zzzzaaaabbbbbbcccc")
174
(setq re (re-comp "a.c"))
175
(re-test '((0 . 3)) re "abc")
176
(re-test '((1 . 4)) re "aaac")
177
(re-test :nomatch re "xac")
178
(re-test '((3 . 6)) re "xaxaac")
179
(re-test '((2 . 5)) re "xxabc")
180
(re-test '((3 . 6)) re "acxaxc")
182
(setq re (re-comp "a*c"))
183
(re-test '((0 . 1)) re "c")
184
(re-test '((5 . 6)) re "xxxxxc")
185
(re-test '((8 . 9)) re "xxxxxxxxc")
186
(re-test '((7 . 8)) re "xxxxxxxcc")
187
(re-test '((0 . 2)) re "ac")
188
(re-test '((0 . 5)) re "aaaac")
189
(re-test '((1 . 3)) re "xac")
190
(re-test '((3 . 6)) re "xxxaac")
191
(re-test '((2 . 4)) re "xxac")
192
(re-test '((4 . 6)) re "xxxxac")
194
(setq re (re-comp "a+c"))
195
(re-test '((2 . 5)) re "xxaac")
196
(re-test '((3 . 8)) re "xxxaaaac")
197
(re-test '((6 . 8)) re "xaaaabac")
198
(re-test :nomatch re "xxxc")
199
(re-test '((4 . 9)) re "xxxxaaaaccc")
201
(setq re (re-comp "a{4}b"))
202
(re-test '((19 . 24)) re "xabxxaabxxxaaabxxxxaaaab")
203
(re-test '((4 . 9)) re "aaabaaaab")
205
(setq re (re-comp "a{4,}b"))
206
(re-test '((3 . 8)) re "xxxaaaab")
207
(re-test '((8 . 25)) re "zaaabzzzaaaaaaaaaaaaaaaab")
209
(setq re (re-comp "a{,4}b"))
210
(re-test '((0 . 1)) re "b")
211
(re-test '((8 . 9)) re "xxxxxxxxb")
212
(re-test '((6 . 11)) re "xaaaaaaaaab")
213
(re-test '((3 . 5)) re "xxxab")
214
(re-test '((6 . 10)) re "aaaaaxaaab")
216
(setq re (re-comp "a{2,4}b"))
217
(re-test :nomatch re "xab")
218
(re-test '((1 . 4)) re "xaab")
219
(re-test '((1 . 5)) re "xaaab")
220
(re-test '((2 . 7)) re "xxaaaab")
221
(re-test '((4 . 9)) re "xxxaaaaab")
223
(setq re (re-comp "foo(bar|baz)fee"))
224
(re-test '((9 . 18)) re "feebarbazfoobarfee")
225
(re-test '((9 . 18) (12 . 15)) re "feebarbazfoobarfee" :count 2)
226
(re-test '((13 . 22)) re "foofooobazfeefoobazfee")
227
(re-test '((13 . 22) (16 . 19)) re "foofooobazfeefoobazfee" :count 3)
229
(setq re (re-comp "foo(bar|baz)fee" :nosub t))
230
(re-test '((9 . 18)) re "feebarbazfoobarfee")
231
(re-test '((9 . 18)) re "feebarbazfoobarfee" :count 2)
232
(re-test '((13 . 22)) re "foofooobazfeefoobazfee")
233
(re-test '((13 . 22)) re "foofooobazfeefoobazfee" :count 3)
235
(setq re (re-comp "f(oo|ee)ba[rz]"))
236
(re-test :nomatch re "barfoebaz")
237
(re-test '((3 . 9) (4 . 6)) re "bazfoobar" :count 2)
238
(re-test '((3 . 9) (4 . 6)) re "barfeebaz" :count 2)
240
(setq re (re-comp "f(oo|ee)ba[rz]" :nosub t))
241
(re-test :nomatch re "barfoebaz")
242
(re-test '((3 . 9)) re "bazfoobar" :count 2)
243
(re-test '((3 . 9)) re "barfeebaz" :count 2)
245
(setq re (re-comp "\\<(int|char)\\>"))
246
(re-test '((15 . 18)) re "aint character int foo")
247
(re-test '((15 . 18) (15 . 18)) re "aint character int foo" :count 2)
249
(setq re (re-comp "\\<(int|char)\\>" :nosub t))
250
(re-test '((15 . 18)) re "aint character int foo" :count 2)
252
(setq re (re-comp "foo.*bar"))
253
(re-test '((11 . 17)) re "barfoblaboofoobarfoobarfoobar")
255
(setq re (re-comp "foo.+bar"))
256
(re-test :nomatch re "foobar")
257
(re-test '((6 . 13)) re "fobbarfooxbarfooybar")
259
(setq re (re-comp "foo.?bar"))
260
(re-test '((1 . 7)) re "xfoobar")
261
(re-test :nomatch re "xxfooxxbar")
262
(re-test '((3 . 10)) re "yyyfootbar")
264
(setq re (re-comp "a.*b.*c"))
265
(re-test '((0 . 3)) re "abc")
266
(re-test '((9 . 18)) re "xxxxxxxxxabbbbbbbccaaaaabbbc")
268
(setq re (re-comp "a.+b.*c"))
269
(re-test :nomatch re "xxxabc")
270
(re-test '((2 . 7)) re "xxaxbbc")
272
(setq re (re-comp "a.+b.?c"))
273
(re-test '((1 . 5)) re "xaabc")
274
(re-test '((2 . 7)) re "xxaabbc")
276
(setq re (re-comp "(foo.*|bar)fee"))
277
(re-test '((3 . 9) (3 . 6)) re "barfoofee" :count 2)
278
(re-test '((0 . 9) (0 . 6)) re "foobarfee" :count 2)
279
(re-test '((4 . 10) (4 . 7)) re "xxfobarfee" :count 2)
280
(re-test '((3 . 17) (3 . 14)) re "barfooooooobarfee" :count 2)
281
(re-test '((4 . 10) (4 . 7)) re "xxfobarfeefoobar" :count 2)
283
(setq re (re-comp "(foo.+|bar)fee"))
284
(re-test :nomatch re "barfoofee" :count 2)
285
(re-test '((3 . 10) (3 . 7)) re "barfooxfee" :count 2)
287
(setq re (re-comp "(foo.?|bar)fee"))
288
(re-test :nomatch re "foobar" :count 2)
289
(re-test '((2 . 8) (2 . 5)) re "bafoofee" :count 2)
290
(re-test '((2 . 9) (2 . 6)) re "bafooofeebarfee" :count 4)
291
(re-test '((2 . 8) (2 . 5)) re "bafoofeebarfee" :count 2)
292
(re-test nil re "bafoofeebarfee" :count 0)
293
(re-test '((2 . 8)) re "bafoofeebarfee" :count 1)
295
(setq re (re-comp "(a|b|c)\\1"))
296
(re-test '((0 . 2) (0 . 1)) re "aa" :count 2)
298
(setq re (re-comp "(a|b|c)(a|b|c)\\1\\2"))
299
(re-test '((0 . 4) (0 . 1) (1 . 2)) re "acac" :count 5)
300
(re-test '((4 . 8) (4 . 5) (5 . 6)) re "xxxxacac" :count 4)
301
(re-test '((24 . 28) (24 . 25) (25 . 26)) re "xxacabacbcacbbacbcaaccabcaca" :count 3)
302
(re-test '((4 . 8) (4 . 5) (5 . 6)) re "xyabcccc" :count 3)
303
(re-test '((4 . 8) (4 . 5)) re "xyabcccc" :count 2)
304
(re-test '((4 . 8)) re "xyabcccc" :count 1)
305
(re-test nil re "xyabcccc" :count 0)
307
(setq re (re-comp "(a*b)\\1"))
308
(re-test '((3 . 15) (3 . 9)) re "xxxaaaaabaaaaab" :count 2)
309
(re-test '((7 . 9) (7 . 8)) re "abaabaxbb" :count 2)
311
(setq re (re-comp "(ab+c)\\1"))
312
(re-test '((3 . 13) (3 . 8)) re "xaaabbbcabbbc" :count 3)
314
(setq re (re-comp "(ab?c)\\1"))
315
(re-test :nomatch re "abcac" :count 2)
316
(re-test '((4 . 8) (4 . 6)) re "acabacac" :count 2)
317
(re-test '((5 . 11) (5 . 8)) re "abcacabcabc" :count 2)
318
(re-test '((3 . 7) (3 . 5)) re "abcacac" :count 2)
320
(setq re (re-comp "a(.*)b\\1"))
321
(re-test '((3 . 5) (4 . 4)) re "xxxab" :count 2)
322
(re-test '((4 . 12) (5 . 8)) re "xxxxazzzbzzz" :count 2)
324
(setq re (re-comp "abc" :icase t))
325
(re-test '((0 . 3)) re "AbC")
327
(setq re (re-comp "[0-9][a-z]+" :icase t))
328
(re-test '((3 . 10)) re "xxx0aaZxYT9")
330
(setq re (re-comp "a.b" :icase t))
331
(re-test '((10 . 13)) re "aaaaaaaaaaaxB")
333
(setq re (re-comp "a.*z" :icase t))
334
(re-test '((3 . 9)) re "xxxAaaaaZ")
335
(re-test '((2 . 6)) re "xxaaaZaaa")
337
(setq re (re-comp "\\<(lambda|defun|defmacro)\\>" :icase t))
338
(re-test '((5 . 11)) re " (lambda")
339
(re-test '((5 . 11) (5 . 11)) re " (lambda" :count 2)
340
(re-test :nomatch re "lamda defunn deffmacro")
342
(setq re (re-comp "\\<(nil|t)\\>" :icase t))
343
(re-test '((3 . 6)) re "it Nil")
344
(re-test '((3 . 6) (3 . 6)) re "it Nil" :count 6)
345
(re-test :nomatch re "nilo")
347
(setq re (re-comp "\\<(begin|end)\\>" :icase t))
348
(re-test '((21 . 24) (21 . 24)) re "beginning the ending EnD" :count 7)
350
(setq re (re-comp "a.*" :newline t))
351
(re-test '((0 . 1)) re "a
353
(re-test '((3 . 4)) re "xyza
356
(setq re (re-comp "a.+" :newline t))
357
(re-test '((2 . 5)) re "a
359
(re-test '((5 . 7)) re "xyza
362
(setq re (re-comp "a.?" :newline t))
363
(re-test '((0 . 1)) re "a
365
(re-test '((3 . 4)) re "xyza
368
(setq re (re-comp "a.*b.*c" :newline t))
369
(re-test '((11 . 14)) re "xxaa
372
(re-test '((6 . 9)) re "xxxab
376
(setq re (re-comp "a.+b.*c" :newline t))
377
(re-test '((6 . 10)) re "ab
381
(setq re (re-comp "a.?b.*c" :newline t))
382
(re-test '((4 . 8)) re "ab
386
(setq re (re-comp "^foo$" :newline t))
387
(re-test '((11 . 14)) re "bar
390
(re-test '((0 . 3)) re "foo
394
(re-test '((8 . 11)) re "foo
398
(re-test '((8 . 11)) re "foo
401
(re-test :nomatch re "foo
403
foo" :notbol t :noteol t)
405
(setq re (re-comp "^\\s*#\\s*(define|include)\\s+.+" :newline t))
406
(re-test '((8 . 18)) re "#define
408
(re-test '((8 . 18) (9 . 16)) re "#define
409
#include x" :count 2)
411
(setq re (re-comp "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"))
412
(re-test '((3 . 259)) re "zzzxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzzz")
414
(setq re (re-comp "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~"))
415
(re-test '((13 . 333)) re "String here: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~/")
417
(setq re (re-comp "(.*)\\D(\\d+)"))
418
(re-test '((0 . 6) (0 . 3) (4 . 6)) re "abcW12" :count 3)
419
(re-test '((0 . 6) (0 . 3)) re "abcW12" :count 2)
420
(re-test '((0 . 6)) re "abcW12" :count 1)
421
(re-test nil re "abcW12" :count 0)
422
(re-test '((0 . 6) (0 . 3) (4 . 6)) re "abcW12abcW12" :count 3)
423
(re-test '((0 . 6) (0 . 3) (4 . 6)) re "abcW12abcW12a" :count 3)
425
(setq re (re-comp ".*\\d"))
426
(re-test '((0 . 2)) re "a1a1a1aaaaaaa") ; minimal match only
428
(setq re (re-comp "(.*)\\d"))
429
(re-test '((0 . 2) (0 . 1)) re "a1a1a1aaaaaaa" :count 2); minimal match only
431
(setq re (re-comp ".*(\\d)"))
432
(re-test '((0 . 2) (1 . 2)) re "a1a1a1aaaaaaa" :count 2); minimal match only
434
;; XXX this very simple pattern was entering an infinite loop
435
;; actually, this pattern is not supported, just test if is not
436
;; crashing (not supported because it is not cheap to match variations
438
(setq re (re-comp "(.*a)?"))
439
(re-test '((0 . 1)) re "aaaa") ; expected, minimal match
440
(re-test '((0 . 1) (0 . 1)) re "aaaa" :count 2)