~vcs-imports/gawk/master

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# From arnold  Thu May  9 17:27:03 2002
# Return-Path: <arnold@skeeve.com>
# Received: (from arnold@localhost)
# 	by skeeve.com (8.11.6/8.11.6) id g49ER3K27925
# 	for arnold; Thu, 9 May 2002 17:27:03 +0300
# Date: Thu, 9 May 2002 17:27:03 +0300
# From: Aharon Robbins <arnold@skeeve.com>
# Message-Id: <200205091427.g49ER3K27925@skeeve.com>
# To: arnold@skeeve.com
# Subject: fixme
# X-SpamBouncer: 1.4 (10/07/01)
# X-SBRule: Pattern Match (Other Patterns) (Score: 4850)
# X-SBRule: Pattern Match (Spam Phone #) (Score: 0)
# X-SBClass: Blocked
# Status: RO
# 
# Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail
# From: laura@madonnaweb.com (laura fairhead)
# Newsgroups: comp.lang.awk
# Subject: bug in gawk3.1.0 regex code
# Date: Wed, 08 May 2002 23:31:40 GMT
# Organization: that'll be the daewooo :)
# Lines: 211
# Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE>
# Reply-To: laura@madonnaweb.com
# NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48)
# X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286])
# X-Newsreader: Forte Free Agent 1.21/32.243
# Xref: dfw-artgen comp.lang.awk:13059
# 
# 
# I believe I've just found a bug in gawk3.1.0 implementation of
# extended regular expressions. It seems to be down to the alternation
# operator; when using an end anchor '$' as a subexpression in an
# alternation and the entire matched RE is a nul-string it fails
# to match the end of string, for example;
# 
# gsub(/$|2/,"x")
# print
# 
# input           = 12345
# expected output = 1x345x
# actual output   = 1x345
# 
# The start anchor '^' always works as expected;
# 
# gsub(/^|2/,"x")
# print
# 
# input           = 12345
# expected output = x1x345
# actual output   = x1x345
# 
# This was with POSIX compliance enabled althought that doesn't
# effect the result.
# 
# I checked on gawk3.0.6 and got exactly the same results however
# gawk2.15.6 gives the expected results.
# 
# I'm about to post a bug report about this into gnu.utils.bug
# but I thought I'd post it here first in case anyone has
# any input/comments/whatever ....
# 
# Complete test results were as follows;
# 
# input          12345
# output         gsub(/regex/,"x",input)
# 
# regex          output
# (^)            x12345
# ($)            12345x
# (^)|($)        x12345x
# ($)|(^)        x12345x
# (2)            1x345
# (^)|2          x1x345
# 2|(^)          x1x345
# ($)|2          1x345
# 2|($)          1x345
# (2)|(^)        x1x345
# (^)|(2)        x1x345
# (2)|($)        1x345
# ($)|(2)        1x345
# .((2)|(^))     x345
# .((^)|(2))     x345
# .((2)|($))     x34x
# .(($)|(2))     x34x
# x{0}((2)|(^))  x1x345
# x{0}((^)|(2))  x1x345
# x{0}((2)|($))  1x345
# x{0}(($)|(2))  1x345
# x*((2)|(^))    x1x345
# x*((^)|(2))    x1x345
# x*((2)|($))    1x345
# x*(($)|(2))    1x345
# 
# Here's the test program I used, a few of the cases use ERE {n[,[m]]}
# operators so that will have to be commented out or have a check
# added or something (should have put a conditional in I know... ;-)
# 
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 
BEGIN{

TESTSTR="12345"

print "input          "TESTSTR
print "output         gsub(/regex/,\"x\",input)"
print ""

print "regex          output"
$0=TESTSTR
gsub(/(^)/,"x")
print "(^)            "$0

$0=TESTSTR
gsub(/($)/,"x")
print "($)            "$0

$0=TESTSTR
gsub(/(^)|($)/,"x")
print "(^)|($)        "$0

$0=TESTSTR
gsub(/($)|(^)/,"x")
print "($)|(^)        "$0

$0=TESTSTR
gsub(/2/,"x")
print "(2)            "$0

$0=TESTSTR
gsub(/(^)|2/,"x")
print "(^)|2          "$0

$0=TESTSTR
gsub(/2|(^)/,"x")
print "2|(^)          "$0

$0=TESTSTR
gsub(/($)|2/,"x")
print "($)|2          "$0

$0=TESTSTR
gsub(/2|($)/,"x")
print "2|($)          "$0

$0=TESTSTR
gsub(/(2)|(^)/,"x")
print "(2)|(^)        "$0

$0=TESTSTR
gsub(/(^)|(2)/,"x")
print "(^)|(2)        "$0

$0=TESTSTR
gsub(/(2)|($)/,"x")
print "(2)|($)        "$0

$0=TESTSTR
gsub(/($)|(2)/,"x")
print "($)|(2)        "$0

$0=TESTSTR
gsub(/.((2)|(^))/,"x")
print ".((2)|(^))     "$0

$0=TESTSTR
gsub(/.((^)|(2))/,"x")
print ".((^)|(2))     "$0

$0=TESTSTR
gsub(/.((2)|($))/,"x")
print ".((2)|($))     "$0

$0=TESTSTR
gsub(/.(($)|(2))/,"x")
print ".(($)|(2))     "$0

$0=TESTSTR
gsub(/x{0}((2)|(^))/,"x")
print "x{0}((2)|(^))  "$0

$0=TESTSTR
gsub(/x{0}((^)|(2))/,"x")
print "x{0}((^)|(2))  "$0

$0=TESTSTR
gsub(/x{0}((2)|($))/,"x")
print "x{0}((2)|($))  "$0

$0=TESTSTR
gsub(/x{0}(($)|(2))/,"x")
print "x{0}(($)|(2))  "$0

$0=TESTSTR
gsub(/x*((2)|(^))/,"x")
print "x*((2)|(^))    "$0

$0=TESTSTR
gsub(/x*((^)|(2))/,"x")
print "x*((^)|(2))    "$0

$0=TESTSTR
gsub(/x*((2)|($))/,"x")
print "x*((2)|($))    "$0

$0=TESTSTR
gsub(/x*(($)|(2))/,"x")
print "x*(($)|(2))    "$0

$0=TESTSTR
gsub(/x{0}^/,"x")
print "x{0}^          "$0

$0=TESTSTR
gsub(/x{0}$/,"x")
print "x{0}$          "$0

$0=TESTSTR
gsub(/(x{0}^)|2/,"x")
print "(x{0}^)|2      "$0

$0=TESTSTR
gsub(/(x{0}$)|2/,"x")
print "(x{0}$)|2      "$0


}
# 
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 
# byefrom
# 
# -- 
# laura fairhead  # laura@madonnaweb.com  http://lf.8k.com
#                 # if you are bored crack my sig.
# 1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A
# EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E
# 630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA
# 8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978
# 80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492
#