~vcs-imports/gawk/master

408.13.98 by Arnold D. Robbins
Add additional fpat test.
1
BEGIN {
2
	false = 0
3
	true = 1
4
5
	fpat[1] = "([^,]*)|(\"[^\"]+\")"
6
	fpat[2] = fpat[1]
7
	fpat[3] = fpat[1]
8
	fpat[4] = "aa+"
9
	fpat[5] = fpat[4]
10
	fpat[6] = "[a-z]"
11
12
	data[1] = "Robbins,,Arnold,"
13
	data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
14
	data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
15
	data[4] = "bbbaaacccdddaaaaaqqqq"
16
	data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa
17
	data[6] = "aAbBcC"
18
19
	for (i = 1; i in data; i++) {
20
		printf("Splitting: <%s>\n", data[i])
21
		n = mypatsplit(data[i], fields, fpat[i], seps)
22
		m = patsplit(data[i], fields2, fpat[i], seps2)
23
		print "n =", n, "m =", m
24
		if (n != m) {
25
			printf("ERROR: counts wrong!\n") > "/dev/stderr"
26
			exit 1
27
		}
28
		for (j = 1; j <= n; j++) {
29
			printf("fields[%d] = <%s>\tfields2[%d] = <%s>\n", j, fields[j], j, fields2[j])
30
			if (fields[j] != fields2[j]) {
31
				printf("ERROR: data %d, field %d mismatch!\n", i, j) > "/dev/stderr"
32
				exit 1
33
			}
34
		}
35
		for (j = 0; j in seps; j++) {
36
			printf("seps[%d] = <%s>\tseps2[%d] = <%s>\n", j, seps[j], j, seps2[j])
37
			if (seps[j] != seps2[j]) {
38
				printf("ERROR: data %d, separator %d mismatch!\n", i, j) > "/dev/stderr"
39
				exit 1
40
			}
41
		}
42
	}
43
}
44
45
function mypatsplit(string, array, pattern, seps,
46
			eosflag, non_empty, nf) # locals
47
{
48
	delete array
49
	delete seps
50
	if (length(string) == 0)
51
		return 0
52
53
	eosflag = non_empty = false
54
	nf = 0
55
	while (match(string, pattern)) {
56
		if (RLENGTH > 0) {	# easy case
57
			non_empty = true
58
			if (! (nf in seps)) {
59
				if (RSTART == 1)	# match at front of string
60
					seps[nf] = ""
61
				else
62
					seps[nf] = substr(string, 1, RSTART - 1)
63
			}
64
			array[++nf] = substr(string, RSTART, RLENGTH)
65
			string = substr(string, RSTART+RLENGTH)
66
			if (length(string) == 0)
67
				break
68
		} else if (non_empty) {
69
			# last match was non-empty, and at the
70
			# current character we get a zero length match,
71
			# which we don't want, so skip over it
72
			non_empty = false
73
			seps[nf] = substr(string, 1, 1)
74
			string = substr(string, 2)
75
		} else {
76
			# 0 length match
77
			if (! (nf in seps)) {
78
				if (RSTART == 1)
79
					seps[nf] = ""
80
				else
81
					seps[nf] = substr(string, 1, RSTART - 1)
82
			}
83
			array[++nf] = ""
84
			if (! non_empty && ! eosflag) { # prev was empty
85
				seps[nf] = substr(string, 1, 1)
86
			}
87
			if (RSTART == 1) {
88
				string = substr(string, 2)
89
			} else {
90
				string = substr(string, RSTART + 1)
91
			}
92
			non_empty = false
93
		}
94
		if (length(string) == 0) {
95
			if (eosflag)
96
				break
97
			else
98
				eosflag = true
99
		}
100
	}
101
	if (length(string) > 0)
102
		seps[nf] = string
103
104
	return length(array)
105
}