1
/-- This set of tests checks the API, internals, and non-Perl stuff for UTF-8
2
support, excluding Unicode properties. --/
5
------------------------------------------------------------------
10
------------------------------------------------------------------
11
Capturing subpattern count = 0
17
------------------------------------------------------------------
22
------------------------------------------------------------------
23
Capturing subpattern count = 0
29
------------------------------------------------------------------
34
------------------------------------------------------------------
35
Capturing subpattern count = 0
41
------------------------------------------------------------------
46
------------------------------------------------------------------
47
Capturing subpattern count = 0
53
------------------------------------------------------------------
58
------------------------------------------------------------------
59
Capturing subpattern count = 0
65
------------------------------------------------------------------
70
------------------------------------------------------------------
71
Capturing subpattern count = 0
77
------------------------------------------------------------------
82
------------------------------------------------------------------
83
Capturing subpattern count = 0
89
------------------------------------------------------------------
94
------------------------------------------------------------------
95
Capturing subpattern count = 0
101
------------------------------------------------------------------
106
------------------------------------------------------------------
107
Capturing subpattern count = 0
1
/-- This set of tests checks the API, internals, and non-Perl stuff for UTF
2
support, excluding Unicode properties. However, tests that give different
3
results in 8-bit and 16-bit modes are excluded (see tests 16 and 17). --/
6
Failed: character value in \x{...} sequence is too large at offset 9
113
9
Failed: character value in \x{...} sequence is too large at offset 11
116
12
Failed: character value in \x{...} sequence is too large at offset 12
15
Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
18
Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
118
24
/^\x{100}a\x{1234}/8
119
25
\x{100}a\x{1234}bcd
120
26
0: \x{100}a\x{1234}
123
------------------------------------------------------------------
128
------------------------------------------------------------------
129
Capturing subpattern count = 0
135
------------------------------------------------------------------
140
------------------------------------------------------------------
141
Capturing subpattern count = 0
146
28
/\x{0041}\x{2262}\x{0391}\x{002e}/DZ8
147
29
------------------------------------------------------------------
152
34
------------------------------------------------------------------
153
35
Capturing subpattern count = 0
157
39
\x{0041}\x{2262}\x{0391}\x{002e}
158
40
0: A\x{2262}\x{391}.
160
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
161
------------------------------------------------------------------
163
\x{d55c}\x{ad6d}\x{c5b4}
166
------------------------------------------------------------------
167
Capturing subpattern count = 0
171
\x{D55c}\x{ad6d}\x{C5B4}
172
0: \x{d55c}\x{ad6d}\x{c5b4}
174
/\x{65e5}\x{672c}\x{8a9e}/DZ8
175
------------------------------------------------------------------
177
\x{65e5}\x{672c}\x{8a9e}
180
------------------------------------------------------------------
181
Capturing subpattern count = 0
185
\x{65e5}\x{672c}\x{8a9e}
186
0: \x{65e5}\x{672c}\x{8a9e}
189
------------------------------------------------------------------
194
------------------------------------------------------------------
195
Capturing subpattern count = 0
201
------------------------------------------------------------------
206
------------------------------------------------------------------
207
Capturing subpattern count = 0
213
------------------------------------------------------------------
218
------------------------------------------------------------------
219
Capturing subpattern count = 0
225
------------------------------------------------------------------
230
------------------------------------------------------------------
231
Capturing subpattern count = 0
237
------------------------------------------------------------------
242
------------------------------------------------------------------
243
Capturing subpattern count = 0
249
43
------------------------------------------------------------------
353
------------------------------------------------------------------
355
[\x00-`c-\xbf\xf1-\xff] (neg)
358
------------------------------------------------------------------
359
Capturing subpattern count = 0
363
Subject length lower bound = 1
364
Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
365
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
366
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
367
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
368
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
369
\xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
370
\xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
371
\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
372
\xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
390
------------------------------------------------------------------
396
------------------------------------------------------------------
397
Capturing subpattern count = 0
401
Subject length lower bound = 3
402
No set of starting bytes
403
\x{100}\x{100}\x{100}\x{100\x{100}
404
0: \x{100}\x{100}\x{100}
407
------------------------------------------------------------------
416
------------------------------------------------------------------
417
Capturing subpattern count = 1
421
Subject length lower bound = 1
422
Starting byte set: x \xc4
425
------------------------------------------------------------------
435
------------------------------------------------------------------
436
Capturing subpattern count = 1
440
Subject length lower bound = 1
441
Starting byte set: a x \xc4
443
/(\x{100}{0,2}a|x)/8SDZ
444
------------------------------------------------------------------
454
------------------------------------------------------------------
455
Capturing subpattern count = 1
459
Subject length lower bound = 1
460
Starting byte set: a x \xc4
462
/(\x{100}{1,2}a|x)/8SDZ
463
------------------------------------------------------------------
474
------------------------------------------------------------------
475
Capturing subpattern count = 1
479
Subject length lower bound = 1
480
Starting byte set: x \xc4
482
122
/\x{100}*(\d+|"(?1)")/8
607
------------------------------------------------------------------
608
Capturing subpattern count = 0
614
------------------------------------------------------------------
619
------------------------------------------------------------------
620
Capturing subpattern count = 0
626
------------------------------------------------------------------
628
[\x00-\xc3\xc5-\xff] (neg)
631
------------------------------------------------------------------
632
Capturing subpattern count = 0
638
------------------------------------------------------------------
643
------------------------------------------------------------------
644
Capturing subpattern count = 0
207
------------------------------------------------------------------
208
Capturing subpattern count = 0
658
214
------------------------------------------------------------------
797
Failed: invalid UTF-8 string at offset 2
800
Failed: invalid UTF-8 string at offset 1
803
Failed: invalid UTF-8 string at offset 1
806
------------------------------------------------------------------
808
\X{c0}\X{c0}\X{c0}xxx
811
------------------------------------------------------------------
812
Capturing subpattern count = 0
813
Options: utf8 no_utf8_check
844
\xfc\x83\x80\x80\x80\x80
846
\xfe\x80\x80\x80\x80\x80
848
\xff\x80\x80\x80\x80\x80
864
\xfc\x84\x80\x80\x80\x80
866
\xfd\x83\x80\x80\x80\x80
868
\?\xf8\x88\x80\x80\x80
870
\?\xf9\x87\x80\x80\x80
872
\?\xfc\x84\x80\x80\x80\x80
874
\?\xfd\x83\x80\x80\x80\x80
877
/\x{100}abc(xyz(?1))/8DZ
878
------------------------------------------------------------------
889
------------------------------------------------------------------
890
Capturing subpattern count = 1
895
326
/[^\x{100}]abc(xyz(?1))/8DZ
896
327
------------------------------------------------------------------
1159
557
------------------------------------------------------------------
1160
558
Capturing subpattern count = 0
1165
/\x{100}+\x{200}/8DZ
1166
------------------------------------------------------------------
1172
------------------------------------------------------------------
1173
Capturing subpattern count = 0
1179
------------------------------------------------------------------
1185
------------------------------------------------------------------
1186
Capturing subpattern count = 0
1192
------------------------------------------------------------------
1198
------------------------------------------------------------------
1199
Capturing subpattern count = 0
1204
563
/()()()()()()()()()()
1205
564
()()()()()()()()()()
1206
565
()()()()()()()()()()
2092
1403
Partial match: X
2095
Capturing subpattern count = 0
2099
Subject length lower bound = 1
2100
Starting byte set: \x09 \x20 \xa0
2103
Capturing subpattern count = 0
2107
Subject length lower bound = 1
2108
Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3
2129
Capturing subpattern count = 0
2133
Subject length lower bound = 1
2134
Starting byte set: \x0a \x0b \x0c \x0d \x85
2137
Capturing subpattern count = 0
2141
Subject length lower bound = 1
2142
Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2
2157
Capturing subpattern count = 0
2161
Subject length lower bound = 2
2162
Starting byte set: \x0a \x0b \x0c \x0d \x85
2165
Capturing subpattern count = 0
2169
Subject length lower bound = 2
2170
Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2
2173
Capturing subpattern count = 0
2177
Subject length lower bound = 1
2178
Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3
2183
Capturing subpattern count = 0
2187
Subject length lower bound = 2
2188
Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2
2191
Capturing subpattern count = 0
2195
Subject length lower bound = 4
2196
Starting byte set: \x09 \x0a \x0c \x0d \x20 x
2199
1406
AB\x{85}xxx\x{a0}XYZ
2200
1407
0: \x{85}xxx\x{a0}
2201
1408
AB\x{a0}xxx\x{85}XYZ
2202
1409
0: \x{a0}xxx\x{85}
2205
Capturing subpattern count = 0
2209
Subject length lower bound = 5
2210
Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2
2211
AB\x{85}xxx\x{a0}XYZ
2213
AB\x{a0}xxx\x{85}XYZ
2218
1413
0: \x{a2} \x{84}
2221
Capturing subpattern count = 0
2225
Subject length lower bound = 3
2226
Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
2227
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
2228
\x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
2229
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
2230
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3
2231
\xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
2232
\xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
2233
\xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
2234
\xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
2240
1415
'A#хц'8x<any>BZ
2241
1416
------------------------------------------------------------------
2299
1474
------------------------------------------------------------------
2316
1477
Failed: \c must be followed by an ASCII character at offset 3
1485
0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
1486
1: \x{0d}\x{0d}\x{0a}\x{0a}
1489
0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
1490
1: \x{0d}\x{0d}\x{0a}\x{0a}
1499
0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
1503
0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
1508
Capturing subpattern count = 0
1509
Options: caseless utf
1512
Subject length lower bound = 1
1513
No set of starting bytes
1516
Capturing subpattern count = 0
1517
Options: caseless utf
1520
Subject length lower bound = 1
1521
No set of starting bytes
1524
Capturing subpattern count = 0
1525
Options: caseless utf
1528
Subject length lower bound = 1
1529
No set of starting bytes
1531
/[^\x{1234}]{2}/iS8I
1532
Capturing subpattern count = 0
1533
Options: caseless utf
1536
Subject length lower bound = 2
1537
No set of starting bytes
1539
//<bsr_anycrlf><bsr_unicode>
1540
Failed: inconsistent NEWLINE options at offset 0
1561
Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
1564
Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
1567
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
1568
0: \x{1680}\x{2000}\x{202f}\x{3000}
1569
\x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
1570
0: \x{200a}\x{a0}\x{2000}
1573
------------------------------------------------------------------
1575
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{e000}]+
1578
------------------------------------------------------------------
1579
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
1580
0: \x{1680}\x{2000}\x{202f}\x{3000}
1581
\x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
1582
0: \x{200a}\x{a0}\x{2000}
1585
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
1586
0: \x{167f}\x{1681}\x{180d}\x{180f}
1587
\x{2000}\x{200a}\x{1fff}\x{200b}
1589
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
1590
0: \x{202e}\x{2030}\x{205e}\x{2060}
1591
\x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
1592
0: \x{9f}\x{a1}\x{2fff}\x{3001}
1595
------------------------------------------------------------------
1597
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]+
1600
------------------------------------------------------------------
1601
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
1602
0: \x{167f}\x{1681}\x{180d}\x{180f}
1603
\x{2000}\x{200a}\x{1fff}\x{200b}
1605
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
1606
0: \x{202e}\x{2030}\x{205e}\x{2060}
1607
\x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
1608
0: \x{9f}\x{a1}\x{2fff}\x{3001}
1611
\x{2027}\x{2030}\x{2028}\x{2029}
1613
\x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
1614
0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
1617
------------------------------------------------------------------
1619
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{e000}]+
1622
------------------------------------------------------------------
1623
\x{2027}\x{2030}\x{2028}\x{2029}
1625
\x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
1626
0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
1629
\x{2028}\x{2029}\x{2027}\x{2030}
1631
\x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
1632
0: \x{09}\x{0e}\x{84}\x{86}
1635
------------------------------------------------------------------
1637
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]+
1640
------------------------------------------------------------------
1641
\x{2028}\x{2029}\x{2027}\x{2030}
1643
\x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
1644
0: \x{09}\x{0e}\x{84}\x{86}
1647
\x{2027}\x{2030}\x{2028}\x{2029}
1649
\x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
1650
0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d}
2318
1652
/-- End of testinput5 --/