408.13.98
by Arnold D. Robbins
Add additional fpat test. |
1 |
BEGIN { |
2 |
false = 0 |
|
3 |
true = 1 |
|
4 |
||
5 |
fpat[1] = "([^,]*)|(\"[^\"]+\")" |
|
6 |
fpat[2] = fpat[1] |
|
7 |
fpat[3] = fpat[1] |
|
8 |
fpat[4] = "aa+" |
|
9 |
fpat[5] = fpat[4] |
|
10 |
fpat[6] = "[a-z]" |
|
11 |
||
12 |
data[1] = "Robbins,,Arnold," |
|
13 |
data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA" |
|
14 |
data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA" |
|
15 |
data[4] = "bbbaaacccdddaaaaaqqqq" |
|
16 |
data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa |
|
17 |
data[6] = "aAbBcC" |
|
18 |
||
19 |
for (i = 1; i in data; i++) { |
|
20 |
printf("Splitting: <%s>\n", data[i]) |
|
21 |
n = mypatsplit(data[i], fields, fpat[i], seps) |
|
22 |
m = patsplit(data[i], fields2, fpat[i], seps2) |
|
23 |
print "n =", n, "m =", m |
|
24 |
if (n != m) { |
|
25 |
printf("ERROR: counts wrong!\n") > "/dev/stderr" |
|
26 |
exit 1 |
|
27 |
}
|
|
28 |
for (j = 1; j <= n; j++) { |
|
29 |
printf("fields[%d] = <%s>\tfields2[%d] = <%s>\n", j, fields[j], j, fields2[j]) |
|
30 |
if (fields[j] != fields2[j]) { |
|
31 |
printf("ERROR: data %d, field %d mismatch!\n", i, j) > "/dev/stderr" |
|
32 |
exit 1 |
|
33 |
}
|
|
34 |
}
|
|
35 |
for (j = 0; j in seps; j++) { |
|
36 |
printf("seps[%d] = <%s>\tseps2[%d] = <%s>\n", j, seps[j], j, seps2[j]) |
|
37 |
if (seps[j] != seps2[j]) { |
|
38 |
printf("ERROR: data %d, separator %d mismatch!\n", i, j) > "/dev/stderr" |
|
39 |
exit 1 |
|
40 |
}
|
|
41 |
}
|
|
42 |
}
|
|
43 |
}
|
|
44 |
||
45 |
function mypatsplit(string, array, pattern, seps, |
|
46 |
eosflag, non_empty, nf) # locals |
|
47 |
{
|
|
48 |
delete array |
|
49 |
delete seps |
|
50 |
if (length(string) == 0) |
|
51 |
return 0 |
|
52 |
||
53 |
eosflag = non_empty = false |
|
54 |
nf = 0 |
|
55 |
while (match(string, pattern)) { |
|
56 |
if (RLENGTH > 0) { # easy case |
|
57 |
non_empty = true |
|
58 |
if (! (nf in seps)) { |
|
59 |
if (RSTART == 1) # match at front of string |
|
60 |
seps[nf] = "" |
|
61 |
else
|
|
62 |
seps[nf] = substr(string, 1, RSTART - 1) |
|
63 |
}
|
|
64 |
array[++nf] = substr(string, RSTART, RLENGTH) |
|
65 |
string = substr(string, RSTART+RLENGTH) |
|
66 |
if (length(string) == 0) |
|
67 |
break
|
|
68 |
} else if (non_empty) { |
|
69 |
# last match was non-empty, and at the
|
|
70 |
# current character we get a zero length match,
|
|
71 |
# which we don't want, so skip over it
|
|
72 |
non_empty = false |
|
73 |
seps[nf] = substr(string, 1, 1) |
|
74 |
string = substr(string, 2) |
|
75 |
} else { |
|
76 |
# 0 length match
|
|
77 |
if (! (nf in seps)) { |
|
78 |
if (RSTART == 1) |
|
79 |
seps[nf] = "" |
|
80 |
else
|
|
81 |
seps[nf] = substr(string, 1, RSTART - 1) |
|
82 |
}
|
|
83 |
array[++nf] = "" |
|
84 |
if (! non_empty && ! eosflag) { # prev was empty |
|
85 |
seps[nf] = substr(string, 1, 1) |
|
86 |
}
|
|
87 |
if (RSTART == 1) { |
|
88 |
string = substr(string, 2) |
|
89 |
} else { |
|
90 |
string = substr(string, RSTART + 1) |
|
91 |
}
|
|
92 |
non_empty = false |
|
93 |
}
|
|
94 |
if (length(string) == 0) { |
|
95 |
if (eosflag) |
|
96 |
break
|
|
97 |
else
|
|
98 |
eosflag = true |
|
99 |
}
|
|
100 |
}
|
|
101 |
if (length(string) > 0) |
|
102 |
seps[nf] = string |
|
103 |
||
104 |
return length(array) |
|
105 |
}
|