1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
BEGIN {
false = 0
true = 1
fpat[1] = "([^,]*)|(\"[^\"]+\")"
fpat[2] = fpat[1]
fpat[3] = fpat[1]
fpat[4] = "aa+"
fpat[5] = fpat[4]
fpat[6] = "[a-z]"
data[1] = "Robbins,,Arnold,"
data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
data[4] = "bbbaaacccdddaaaaaqqqq"
data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa
data[6] = "aAbBcC"
for (i = 1; i in data; i++) {
printf("Splitting: <%s>\n", data[i])
n = mypatsplit(data[i], fields, fpat[i], seps)
m = patsplit(data[i], fields2, fpat[i], seps2)
print "n =", n, "m =", m
if (n != m) {
printf("ERROR: counts wrong!\n") > "/dev/stderr"
exit 1
}
for (j = 1; j <= n; j++) {
printf("fields[%d] = <%s>\tfields2[%d] = <%s>\n", j, fields[j], j, fields2[j])
if (fields[j] != fields2[j]) {
printf("ERROR: data %d, field %d mismatch!\n", i, j) > "/dev/stderr"
exit 1
}
}
for (j = 0; j in seps; j++) {
printf("seps[%d] = <%s>\tseps2[%d] = <%s>\n", j, seps[j], j, seps2[j])
if (seps[j] != seps2[j]) {
printf("ERROR: data %d, separator %d mismatch!\n", i, j) > "/dev/stderr"
exit 1
}
}
}
}
function mypatsplit(string, array, pattern, seps,
eosflag, non_empty, nf) # locals
{
delete array
delete seps
if (length(string) == 0)
return 0
eosflag = non_empty = false
nf = 0
while (match(string, pattern)) {
if (RLENGTH > 0) { # easy case
non_empty = true
if (! (nf in seps)) {
if (RSTART == 1) # match at front of string
seps[nf] = ""
else
seps[nf] = substr(string, 1, RSTART - 1)
}
array[++nf] = substr(string, RSTART, RLENGTH)
string = substr(string, RSTART+RLENGTH)
if (length(string) == 0)
break
} else if (non_empty) {
# last match was non-empty, and at the
# current character we get a zero length match,
# which we don't want, so skip over it
non_empty = false
seps[nf] = substr(string, 1, 1)
string = substr(string, 2)
} else {
# 0 length match
if (! (nf in seps)) {
if (RSTART == 1)
seps[nf] = ""
else
seps[nf] = substr(string, 1, RSTART - 1)
}
array[++nf] = ""
if (! non_empty && ! eosflag) { # prev was empty
seps[nf] = substr(string, 1, 1)
}
if (RSTART == 1) {
string = substr(string, 2)
} else {
string = substr(string, RSTART + 1)
}
non_empty = false
}
if (length(string) == 0) {
if (eosflag)
break
else
eosflag = true
}
}
if (length(string) > 0)
seps[nf] = string
return length(array)
}
|