281.1.1
by john haque
Speed/memory performance improvements. |
1 |
/*
|
2 |
* cint_array.c - routines for arrays of (mostly) consecutive positive integer indices.
|
|
3 |
*/
|
|
4 |
||
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
5 |
/*
|
1720.1.17
by Arnold D. Robbins
Update copyright year in a bunch of files. |
6 |
* Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019-2022,
|
408.19.184
by Arnold D. Robbins
Changes toward release and test tarball. |
7 |
* the Free Software Foundation, Inc.
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
8 |
*
|
281.1.1
by john haque
Speed/memory performance improvements. |
9 |
* This file is part of GAWK, the GNU implementation of the
|
10 |
* AWK Programming Language.
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
11 |
*
|
281.1.1
by john haque
Speed/memory performance improvements. |
12 |
* GAWK is free software; you can redistribute it and/or modify
|
13 |
* it under the terms of the GNU General Public License as published by
|
|
14 |
* the Free Software Foundation; either version 3 of the License, or
|
|
15 |
* (at your option) any later version.
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
16 |
*
|
281.1.1
by john haque
Speed/memory performance improvements. |
17 |
* GAWK is distributed in the hope that it will be useful,
|
18 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
20 |
* GNU General Public License for more details.
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
21 |
*
|
281.1.1
by john haque
Speed/memory performance improvements. |
22 |
* You should have received a copy of the GNU General Public License
|
23 |
* along with this program; if not, write to the Free Software
|
|
24 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
25 |
*/
|
|
26 |
||
27 |
#include "awk.h" |
|
28 |
||
319.1.116
by Arnold D. Robbins
Clean up of awk.h. |
29 |
#define INT32_BIT 32
|
30 |
||
281.1.1
by john haque
Speed/memory performance improvements. |
31 |
extern FILE *output_fp; |
32 |
extern void indent(int indent_level); |
|
33 |
extern NODE **is_integer(NODE *symbol, NODE *subs); |
|
34 |
||
35 |
/*
|
|
36 |
* NHAT --- maximum size of a leaf array (2^NHAT).
|
|
37 |
* THRESHOLD --- Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
|
|
38 |
*/
|
|
39 |
||
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
40 |
static int NHAT = 10; |
281.1.1
by john haque
Speed/memory performance improvements. |
41 |
static long THRESHOLD; |
42 |
||
321
by john haque
Polish array handling code. |
43 |
/*
|
44 |
* What is the optimium NHAT ? timing results suggest that 10 is a good choice,
|
|
281.1.1
by john haque
Speed/memory performance improvements. |
45 |
* although differences aren't that significant for > 10.
|
46 |
*/
|
|
47 |
||
48 |
||
49 |
static NODE **cint_array_init(NODE *symbol, NODE *subs); |
|
50 |
static NODE **is_uinteger(NODE *symbol, NODE *subs); |
|
51 |
static NODE **cint_lookup(NODE *symbol, NODE *subs); |
|
52 |
static NODE **cint_exists(NODE *symbol, NODE *subs); |
|
53 |
static NODE **cint_clear(NODE *symbol, NODE *subs); |
|
54 |
static NODE **cint_remove(NODE *symbol, NODE *subs); |
|
55 |
static NODE **cint_list(NODE *symbol, NODE *t); |
|
56 |
static NODE **cint_copy(NODE *symbol, NODE *newsymb); |
|
57 |
static NODE **cint_dump(NODE *symbol, NODE *ndump); |
|
58 |
#ifdef ARRAYDEBUG
|
|
59 |
static void cint_print(NODE *symbol); |
|
60 |
#endif
|
|
61 |
||
1051
by Andrew J. Schorr
Use a struct instead of an array to contain the array methods. |
62 |
const array_funcs_t cint_array_func = { |
63 |
"cint", |
|
281.1.1
by john haque
Speed/memory performance improvements. |
64 |
cint_array_init, |
65 |
is_uinteger, |
|
66 |
cint_lookup, |
|
67 |
cint_exists, |
|
68 |
cint_clear, |
|
69 |
cint_remove, |
|
70 |
cint_list, |
|
71 |
cint_copy, |
|
72 |
cint_dump, |
|
322
by john haque
Improve array interface. |
73 |
(afunc_t) 0, |
281.1.1
by john haque
Speed/memory performance improvements. |
74 |
};
|
75 |
||
1146.1.22
by Arnold D. Robbins
Improve sandbox mode. |
76 |
|
77 |
static NODE **argv_store(NODE *symbol, NODE *subs); |
|
78 |
||
79 |
/* special case for ARGV in sandbox mode */
|
|
1472.3.46
by Arnold D. Robbins
Make a number of variables and functions be static. |
80 |
static const array_funcs_t argv_array_func = { |
1146.1.22
by Arnold D. Robbins
Improve sandbox mode. |
81 |
"argv", |
82 |
cint_array_init, |
|
83 |
is_uinteger, |
|
84 |
cint_lookup, |
|
85 |
cint_exists, |
|
86 |
cint_clear, |
|
87 |
cint_remove, |
|
88 |
cint_list, |
|
89 |
cint_copy, |
|
90 |
cint_dump, |
|
91 |
argv_store, |
|
92 |
};
|
|
93 |
||
281.1.1
by john haque
Speed/memory performance improvements. |
94 |
static inline int cint_hash(long k); |
95 |
static inline NODE **cint_find(NODE *symbol, long k, int h1); |
|
96 |
||
97 |
static inline NODE *make_node(NODETYPE type); |
|
98 |
||
99 |
static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base); |
|
100 |
static NODE **tree_exists(NODE *tree, long k); |
|
101 |
static void tree_clear(NODE *tree); |
|
102 |
static int tree_remove(NODE *symbol, NODE *tree, long k); |
|
103 |
static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree); |
|
322
by john haque
Improve array interface. |
104 |
static long tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind); |
281.1.1
by john haque
Speed/memory performance improvements. |
105 |
static inline NODE **tree_find(NODE *tree, long k, int i); |
106 |
static void tree_info(NODE *tree, NODE *ndump, const char *aname); |
|
107 |
static size_t tree_kilobytes(NODE *tree); |
|
108 |
#ifdef ARRAYDEBUG
|
|
109 |
static void tree_print(NODE *tree, size_t bi, int indent_level); |
|
110 |
#endif
|
|
111 |
||
112 |
static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base); |
|
113 |
static inline NODE **leaf_exists(NODE *array, long k); |
|
114 |
static void leaf_clear(NODE *array); |
|
115 |
static int leaf_remove(NODE *symbol, NODE *array, long k); |
|
116 |
static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray); |
|
322
by john haque
Improve array interface. |
117 |
static long leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind); |
281.1.1
by john haque
Speed/memory performance improvements. |
118 |
static void leaf_info(NODE *array, NODE *ndump, const char *aname); |
119 |
#ifdef ARRAYDEBUG
|
|
120 |
static void leaf_print(NODE *array, size_t bi, int indent_level); |
|
121 |
#endif
|
|
122 |
||
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
123 |
/* powers of 2 table upto 2^30 */
|
281.1.1
by john haque
Speed/memory performance improvements. |
124 |
static const long power_two_table[] = { |
125 |
1, 2, 4, 8, 16, 32, 64, |
|
126 |
128, 256, 512, 1024, 2048, 4096, |
|
127 |
8192, 16384, 32768, 65536, 131072, 262144, |
|
128 |
524288, 1048576, 2097152, 4194304, 8388608, 16777216, |
|
129 |
33554432, 67108864, 134217728, 268435456, 536870912, 1073741824 |
|
130 |
};
|
|
131 |
||
132 |
||
133 |
#define ISUINT(a, s) ((((s)->flags & NUMINT) != 0 || is_integer(a, s) != NULL) \
|
|
134 |
&& (s)->numbr >= 0)
|
|
135 |
||
136 |
/*
|
|
137 |
* To store 2^n integers, allocate top-level array of size n, elements
|
|
138 |
* of which are 1-Dimensional (leaf-array) of geometrically increasing
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
139 |
* size (power of 2).
|
281.1.1
by john haque
Speed/memory performance improvements. |
140 |
*
|
141 |
* [0] --> [ 0 ]
|
|
142 |
* [1] --> [ 1 ]
|
|
143 |
* |2| --> [ 2 | 3 ]
|
|
144 |
* |3| --> [ 4 | 5 | 6 | 7 ]
|
|
145 |
* |.|
|
|
146 |
* |k| --> [ 2^(k - 1)| ... | 2^k - 1 ]
|
|
147 |
* ...
|
|
148 |
*
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
149 |
* For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)).
|
281.1.1
by john haque
Speed/memory performance improvements. |
150 |
*
|
151 |
* The idea for the geometrically increasing array sizes is from:
|
|
152 |
* Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
|
|
153 |
* Bagwell, Phil (2002).
|
|
154 |
* http://infoscience.epfl.ch/record/64410/files/techlists.pdf
|
|
155 |
*
|
|
156 |
* Disadvantage:
|
|
157 |
* Worst case memory waste > 99% and will happen when each of the
|
|
158 |
* leaf arrays contains only a single element. Even with consecutive
|
|
159 |
* integers, memory waste can be as high as 50%.
|
|
160 |
*
|
|
161 |
* Solution: Hashed Array Trees (HATs).
|
|
162 |
*
|
|
163 |
*/
|
|
164 |
||
321
by john haque
Polish array handling code. |
165 |
/* cint_array_init --- array initialization routine */
|
281.1.1
by john haque
Speed/memory performance improvements. |
166 |
|
167 |
static NODE ** |
|
168 |
cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED) |
|
169 |
{
|
|
321
by john haque
Polish array handling code. |
170 |
if (symbol == NULL) { |
171 |
long newval; |
|
408.4.32
by Arnold D. Robbins
Fixes based on problems from a static checker. |
172 |
size_t nelems = (sizeof(power_two_table) / sizeof(power_two_table[0])); |
321
by john haque
Polish array handling code. |
173 |
|
174 |
/* check relevant environment variables */
|
|
175 |
if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT) |
|
176 |
NHAT = newval; |
|
408.4.32
by Arnold D. Robbins
Fixes based on problems from a static checker. |
177 |
/* don't allow overflow off the end of the table */
|
1253.1.1
by Andrew J. Schorr
Fix cint off-by-one array bounds overflow check for NHAT set in the environment. |
178 |
if (NHAT > nelems - 2) |
408.4.32
by Arnold D. Robbins
Fixes based on problems from a static checker. |
179 |
NHAT = nelems - 2; |
321
by john haque
Polish array handling code. |
180 |
THRESHOLD = power_two_table[NHAT + 1]; |
181 |
} else |
|
182 |
null_array(symbol); |
|
183 |
||
408.19.164
by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines. |
184 |
return & success_node; |
281.1.1
by john haque
Speed/memory performance improvements. |
185 |
}
|
186 |
||
187 |
||
188 |
/* is_uinteger --- test if the subscript is an integer >= 0 */
|
|
189 |
||
190 |
NODE ** |
|
191 |
is_uinteger(NODE *symbol, NODE *subs) |
|
192 |
{
|
|
193 |
if (is_integer(symbol, subs) != NULL && subs->numbr >= 0) |
|
408.19.164
by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines. |
194 |
return & success_node; |
281.1.1
by john haque
Speed/memory performance improvements. |
195 |
return NULL; |
196 |
}
|
|
197 |
||
198 |
||
199 |
/* cint_lookup --- Find the subscript in the array; Install it if it isn't there. */
|
|
200 |
||
201 |
static NODE ** |
|
202 |
cint_lookup(NODE *symbol, NODE *subs) |
|
203 |
{
|
|
204 |
NODE **lhs; |
|
205 |
long k; |
|
206 |
int h1 = -1, m, li; |
|
207 |
NODE *tn, *xn; |
|
208 |
long cint_size, capacity; |
|
209 |
||
210 |
k = -1; |
|
211 |
if (ISUINT(symbol, subs)) { |
|
212 |
k = subs->numbr; /* k >= 0 */ |
|
213 |
h1 = cint_hash(k); /* h1 >= NHAT */ |
|
214 |
if ((lhs = cint_find(symbol, k, h1)) != NULL) |
|
215 |
return lhs; |
|
216 |
}
|
|
217 |
xn = symbol->xarray; |
|
218 |
if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL) |
|
219 |
return lhs; |
|
220 |
||
221 |
/* It's not there, install it */
|
|
222 |
||
223 |
if (k < 0) |
|
224 |
goto xinstall; |
|
225 |
||
226 |
m = h1 - 1; /* m >= (NHAT- 1) */ |
|
227 |
||
228 |
/* Estimate capacity upper bound.
|
|
229 |
* capacity upper bound = current capacity + leaf array size.
|
|
230 |
*/
|
|
231 |
li = m > NHAT ? m : NHAT; |
|
232 |
while (li >= NHAT) { |
|
233 |
/* leaf-array of a HAT */
|
|
234 |
li = (li + 1) / 2; |
|
235 |
}
|
|
236 |
capacity = symbol->array_capacity + power_two_table[li]; |
|
237 |
||
238 |
cint_size = (xn == NULL) ? symbol->table_size |
|
239 |
: (symbol->table_size - xn->table_size); |
|
240 |
assert(cint_size >= 0); |
|
241 |
if ((capacity - cint_size) > THRESHOLD) |
|
242 |
goto xinstall; |
|
243 |
||
244 |
if (symbol->nodes == NULL) { |
|
245 |
symbol->array_capacity = 0; |
|
246 |
assert(symbol->table_size == 0); |
|
247 |
||
248 |
/* nodes[0] .. nodes[NHAT- 1] not used */
|
|
731.9.23
by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro. |
249 |
ezalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), "cint_lookup"); |
281.1.1
by john haque
Speed/memory performance improvements. |
250 |
}
|
251 |
||
252 |
symbol->table_size++; /* one more element in array */ |
|
253 |
||
254 |
tn = symbol->nodes[h1]; |
|
255 |
if (tn == NULL) { |
|
256 |
tn = make_node(Node_array_tree); |
|
257 |
symbol->nodes[h1] = tn; |
|
258 |
}
|
|
259 |
||
260 |
if (m < NHAT) |
|
261 |
return tree_lookup(symbol, tn, k, NHAT, 0); |
|
262 |
return tree_lookup(symbol, tn, k, m, power_two_table[m]); |
|
263 |
||
264 |
xinstall: |
|
265 |
||
266 |
symbol->table_size++; |
|
267 |
if (xn == NULL) { |
|
268 |
xn = symbol->xarray = make_array(); |
|
269 |
xn->vname = symbol->vname; /* shallow copy */ |
|
270 |
||
321
by john haque
Polish array handling code. |
271 |
/*
|
272 |
* Avoid using assoc_lookup(xn, subs) which may lead
|
|
281.1.1
by john haque
Speed/memory performance improvements. |
273 |
* to infinite recursion.
|
274 |
*/
|
|
275 |
||
276 |
if (is_integer(xn, subs)) |
|
1051
by Andrew J. Schorr
Use a struct instead of an array to contain the array methods. |
277 |
xn->array_funcs = & int_array_func; |
281.1.1
by john haque
Speed/memory performance improvements. |
278 |
else
|
1051
by Andrew J. Schorr
Use a struct instead of an array to contain the array methods. |
279 |
xn->array_funcs = & str_array_func; |
281.1.1
by john haque
Speed/memory performance improvements. |
280 |
xn->flags |= XARRAY; |
281 |
}
|
|
282 |
return xn->alookup(xn, subs); |
|
283 |
}
|
|
284 |
||
285 |
||
286 |
/* cint_exists --- test whether an index is in the array or not. */
|
|
287 |
||
288 |
static NODE ** |
|
289 |
cint_exists(NODE *symbol, NODE *subs) |
|
290 |
{
|
|
291 |
NODE *xn; |
|
292 |
||
293 |
if (ISUINT(symbol, subs)) { |
|
294 |
long k = subs->numbr; |
|
295 |
NODE **lhs; |
|
296 |
if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL) |
|
297 |
return lhs; |
|
298 |
}
|
|
299 |
if ((xn = symbol->xarray) == NULL) |
|
300 |
return NULL; |
|
301 |
return xn->aexists(xn, subs); |
|
302 |
}
|
|
303 |
||
304 |
||
305 |
/* cint_clear --- flush all the values in symbol[] */
|
|
306 |
||
307 |
static NODE ** |
|
308 |
cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) |
|
309 |
{
|
|
310 |
size_t i; |
|
311 |
NODE *tn; |
|
312 |
||
313 |
assert(symbol->nodes != NULL); |
|
314 |
||
315 |
if (symbol->xarray != NULL) { |
|
316 |
NODE *xn = symbol->xarray; |
|
317 |
assoc_clear(xn); |
|
318 |
freenode(xn); |
|
319 |
symbol->xarray = NULL; |
|
320 |
}
|
|
321 |
||
322 |
for (i = NHAT; i < INT32_BIT; i++) { |
|
323 |
tn = symbol->nodes[i]; |
|
324 |
if (tn != NULL) { |
|
325 |
tree_clear(tn); |
|
326 |
freenode(tn); |
|
327 |
}
|
|
328 |
}
|
|
329 |
||
330 |
efree(symbol->nodes); |
|
321
by john haque
Polish array handling code. |
331 |
symbol->ainit(symbol, NULL); /* re-initialize symbol */ |
281.1.1
by john haque
Speed/memory performance improvements. |
332 |
return NULL; |
333 |
}
|
|
334 |
||
335 |
||
336 |
/* cint_remove --- remove an index from the array */
|
|
337 |
||
338 |
static NODE ** |
|
339 |
cint_remove(NODE *symbol, NODE *subs) |
|
340 |
{
|
|
341 |
long k; |
|
342 |
int h1; |
|
343 |
NODE *tn, *xn = symbol->xarray; |
|
344 |
||
291
by Arnold D. Robbins
The grand merge: dgawk and pgawk folded into gawk. |
345 |
if (symbol->table_size == 0) |
346 |
return NULL; |
|
347 |
||
281.1.1
by john haque
Speed/memory performance improvements. |
348 |
if (! ISUINT(symbol, subs)) |
349 |
goto xremove; |
|
350 |
||
291
by Arnold D. Robbins
The grand merge: dgawk and pgawk folded into gawk. |
351 |
assert(symbol->nodes != NULL); |
352 |
||
281.1.1
by john haque
Speed/memory performance improvements. |
353 |
k = subs->numbr; |
354 |
h1 = cint_hash(k); |
|
355 |
tn = symbol->nodes[h1]; |
|
356 |
if (tn == NULL || ! tree_remove(symbol, tn, k)) |
|
357 |
goto xremove; |
|
358 |
||
359 |
if (tn->table_size == 0) { |
|
360 |
freenode(tn); |
|
361 |
symbol->nodes[h1] = NULL; |
|
362 |
}
|
|
363 |
||
364 |
symbol->table_size--; |
|
365 |
||
366 |
if (xn == NULL && symbol->table_size == 0) { |
|
367 |
efree(symbol->nodes); |
|
321
by john haque
Polish array handling code. |
368 |
symbol->ainit(symbol, NULL); /* re-initialize array 'symbol' */ |
281.1.1
by john haque
Speed/memory performance improvements. |
369 |
} else if(xn != NULL && symbol->table_size == xn->table_size) { |
370 |
/* promote xn to symbol */
|
|
291
by Arnold D. Robbins
The grand merge: dgawk and pgawk folded into gawk. |
371 |
|
281.1.1
by john haque
Speed/memory performance improvements. |
372 |
xn->flags &= ~XARRAY; |
373 |
xn->parent_array = symbol->parent_array; |
|
374 |
efree(symbol->nodes); |
|
375 |
*symbol = *xn; |
|
376 |
freenode(xn); |
|
377 |
}
|
|
378 |
||
408.19.164
by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines. |
379 |
return & success_node; |
281.1.1
by john haque
Speed/memory performance improvements. |
380 |
|
381 |
xremove: |
|
382 |
xn = symbol->xarray; |
|
383 |
if (xn == NULL || xn->aremove(xn, subs) == NULL) |
|
384 |
return NULL; |
|
385 |
if (xn->table_size == 0) { |
|
386 |
freenode(xn); |
|
387 |
symbol->xarray = NULL; |
|
388 |
}
|
|
389 |
symbol->table_size--; |
|
390 |
assert(symbol->table_size > 0); |
|
391 |
||
408.19.164
by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines. |
392 |
return & success_node; |
281.1.1
by john haque
Speed/memory performance improvements. |
393 |
}
|
394 |
||
395 |
||
396 |
/* cint_copy --- duplicate input array "symbol" */
|
|
397 |
||
398 |
static NODE ** |
|
399 |
cint_copy(NODE *symbol, NODE *newsymb) |
|
400 |
{
|
|
401 |
NODE **old, **new; |
|
402 |
size_t i; |
|
403 |
||
404 |
assert(symbol->nodes != NULL); |
|
405 |
||
406 |
/* allocate new table */
|
|
731.9.23
by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro. |
407 |
ezalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy"); |
281.1.1
by john haque
Speed/memory performance improvements. |
408 |
|
409 |
old = symbol->nodes; |
|
410 |
for (i = NHAT; i < INT32_BIT; i++) { |
|
411 |
if (old[i] == NULL) |
|
412 |
continue; |
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
413 |
new[i] = make_node(Node_array_tree); |
281.1.1
by john haque
Speed/memory performance improvements. |
414 |
tree_copy(newsymb, old[i], new[i]); |
415 |
}
|
|
416 |
||
417 |
if (symbol->xarray != NULL) { |
|
418 |
NODE *xn, *n; |
|
419 |
xn = symbol->xarray; |
|
420 |
n = make_array(); |
|
421 |
n->vname = newsymb->vname; |
|
422 |
(void) xn->acopy(xn, n); |
|
423 |
newsymb->xarray = n; |
|
424 |
} else |
|
425 |
newsymb->xarray = NULL; |
|
426 |
||
427 |
newsymb->nodes = new; |
|
428 |
newsymb->table_size = symbol->table_size; |
|
429 |
newsymb->array_capacity = symbol->array_capacity; |
|
430 |
newsymb->flags = symbol->flags; |
|
431 |
||
432 |
return NULL; |
|
433 |
}
|
|
434 |
||
435 |
||
436 |
/* cint_list --- return a list of items */
|
|
437 |
||
438 |
static NODE** |
|
439 |
cint_list(NODE *symbol, NODE *t) |
|
440 |
{
|
|
441 |
NODE **list = NULL; |
|
442 |
NODE *tn, *xn; |
|
443 |
unsigned long k = 0, num_elems, list_size; |
|
444 |
size_t j, ja, jd; |
|
445 |
int elem_size = 1; |
|
322
by john haque
Improve array interface. |
446 |
assoc_kind_t assoc_kind; |
281.1.1
by john haque
Speed/memory performance improvements. |
447 |
|
448 |
num_elems = symbol->table_size; |
|
449 |
if (num_elems == 0) |
|
450 |
return NULL; |
|
322
by john haque
Improve array interface. |
451 |
assoc_kind = (assoc_kind_t) t->flags; |
452 |
if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE)) |
|
281.1.1
by john haque
Speed/memory performance improvements. |
453 |
num_elems = 1; |
454 |
||
322
by john haque
Improve array interface. |
455 |
if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) |
281.1.1
by john haque
Speed/memory performance improvements. |
456 |
elem_size = 2; |
457 |
list_size = num_elems * elem_size; |
|
458 |
||
459 |
if (symbol->xarray != NULL) { |
|
460 |
xn = symbol->xarray; |
|
461 |
list = xn->alist(xn, t); |
|
462 |
assert(list != NULL); |
|
322
by john haque
Improve array interface. |
463 |
assoc_kind &= ~(AASC|ADESC); |
464 |
t->flags = (unsigned int) assoc_kind; |
|
281.1.1
by john haque
Speed/memory performance improvements. |
465 |
if (num_elems == 1 || num_elems == xn->table_size) |
466 |
return list; |
|
467 |
erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list"); |
|
468 |
k = elem_size * xn->table_size; |
|
469 |
} else |
|
470 |
emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list"); |
|
471 |
||
322
by john haque
Improve array interface. |
472 |
if ((assoc_kind & AINUM) == 0) { |
473 |
/* not sorting by "index num" */
|
|
474 |
assoc_kind &= ~(AASC|ADESC); |
|
475 |
t->flags = (unsigned int) assoc_kind; |
|
476 |
}
|
|
281.1.1
by john haque
Speed/memory performance improvements. |
477 |
|
478 |
/* populate it with index in ascending or descending order */
|
|
479 |
||
480 |
for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) { |
|
333
by Arnold D. Robbins
Merge branch 'master' into array-iface |
481 |
j = (assoc_kind & ADESC) != 0 ? jd-- : ja++; |
281.1.1
by john haque
Speed/memory performance improvements. |
482 |
tn = symbol->nodes[j]; |
483 |
if (tn == NULL) |
|
484 |
continue; |
|
322
by john haque
Improve array interface. |
485 |
k += tree_list(tn, list + k, assoc_kind); |
281.1.1
by john haque
Speed/memory performance improvements. |
486 |
if (k >= list_size) |
487 |
return list; |
|
488 |
}
|
|
489 |
return list; |
|
490 |
}
|
|
491 |
||
492 |
||
493 |
/* cint_dump --- dump array info */
|
|
494 |
||
495 |
static NODE ** |
|
496 |
cint_dump(NODE *symbol, NODE *ndump) |
|
497 |
{
|
|
498 |
NODE *tn, *xn = NULL; |
|
499 |
int indent_level; |
|
500 |
size_t i; |
|
281.1.2
by john haque
Add a test file, cleanup code and update doc. |
501 |
long cint_size = 0, xsize = 0; |
281.1.1
by john haque
Speed/memory performance improvements. |
502 |
AWKNUM kb = 0; |
503 |
extern AWKNUM int_kilobytes(NODE *symbol); |
|
281.1.2
by john haque
Add a test file, cleanup code and update doc. |
504 |
extern AWKNUM str_kilobytes(NODE *symbol); |
281.1.1
by john haque
Speed/memory performance improvements. |
505 |
|
506 |
indent_level = ndump->alevel; |
|
507 |
||
508 |
if (symbol->xarray != NULL) { |
|
509 |
xn = symbol->xarray; |
|
281.1.2
by john haque
Add a test file, cleanup code and update doc. |
510 |
xsize = xn->table_size; |
281.1.1
by john haque
Speed/memory performance improvements. |
511 |
}
|
281.1.2
by john haque
Add a test file, cleanup code and update doc. |
512 |
cint_size = symbol->table_size - xsize; |
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
513 |
|
281.1.1
by john haque
Speed/memory performance improvements. |
514 |
if ((symbol->flags & XARRAY) == 0) |
515 |
fprintf(output_fp, "%s `%s'\n", |
|
516 |
(symbol->parent_array == NULL) ? "array" : "sub-array", |
|
517 |
array_vname(symbol)); |
|
518 |
indent_level++; |
|
519 |
indent(indent_level); |
|
520 |
fprintf(output_fp, "array_func: cint_array_func\n"); |
|
521 |
if (symbol->flags != 0) { |
|
522 |
indent(indent_level); |
|
523 |
fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags)); |
|
524 |
}
|
|
525 |
indent(indent_level); |
|
526 |
fprintf(output_fp, "NHAT: %d\n", NHAT); |
|
527 |
indent(indent_level); |
|
528 |
fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD); |
|
529 |
indent(indent_level); |
|
1379.1.24
by Arnold D. Robbins
Fix some printf formatting issues. |
530 |
fprintf(output_fp, "table_size: %lu (total), %ld (cint), %ld (int + str)\n", |
531 |
(unsigned long) symbol->table_size, cint_size, xsize); |
|
281.1.1
by john haque
Speed/memory performance improvements. |
532 |
indent(indent_level); |
533 |
fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) symbol->array_capacity); |
|
534 |
indent(indent_level); |
|
535 |
fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / symbol->array_capacity); |
|
536 |
||
537 |
for (i = NHAT; i < INT32_BIT; i++) { |
|
538 |
tn = symbol->nodes[i]; |
|
539 |
if (tn == NULL) |
|
540 |
continue; |
|
541 |
/* Node_array_tree + HAT */
|
|
542 |
kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0; |
|
543 |
}
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
544 |
kb += (INT32_BIT * sizeof(NODE *)) / 1024.0; /* symbol->nodes */ |
281.1.1
by john haque
Speed/memory performance improvements. |
545 |
kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0; /* value nodes in Node_array_leaf(s) */ |
281.1.2
by john haque
Add a test file, cleanup code and update doc. |
546 |
if (xn != NULL) { |
1051
by Andrew J. Schorr
Use a struct instead of an array to contain the array methods. |
547 |
if (xn->array_funcs == & int_array_func) |
281.1.2
by john haque
Add a test file, cleanup code and update doc. |
548 |
kb += int_kilobytes(xn); |
549 |
else
|
|
550 |
kb += str_kilobytes(xn); |
|
551 |
}
|
|
552 |
||
281.1.1
by john haque
Speed/memory performance improvements. |
553 |
indent(indent_level); |
554 |
fprintf(output_fp, "memory: %.2g kB (total)\n", kb); |
|
555 |
||
556 |
/* dump elements */
|
|
557 |
||
558 |
if (ndump->adepth >= 0) { |
|
559 |
const char *aname; |
|
560 |
||
561 |
fprintf(output_fp, "\n"); |
|
562 |
aname = make_aname(symbol); |
|
563 |
for (i = NHAT; i < INT32_BIT; i++) { |
|
564 |
tn = symbol->nodes[i]; |
|
565 |
if (tn != NULL) |
|
566 |
tree_info(tn, ndump, aname); |
|
567 |
}
|
|
568 |
}
|
|
569 |
||
570 |
if (xn != NULL) { |
|
571 |
fprintf(output_fp, "\n"); |
|
572 |
xn->adump(xn, ndump); |
|
573 |
}
|
|
574 |
||
575 |
#ifdef ARRAYDEBUG
|
|
576 |
if (ndump->adepth < -999) |
|
577 |
cint_print(symbol); |
|
578 |
#endif
|
|
579 |
||
580 |
return NULL; |
|
581 |
}
|
|
582 |
||
583 |
||
584 |
/* cint_hash --- locate the HAT for a given number 'k' */
|
|
585 |
||
586 |
static inline int |
|
587 |
cint_hash(long k) |
|
588 |
{
|
|
589 |
uint32_t num, r, shift; |
|
590 |
||
591 |
assert(k >= 0); |
|
592 |
if (k == 0) |
|
593 |
return NHAT; |
|
594 |
num = k; |
|
595 |
||
596 |
/* Find the Floor(log base 2 of 32-bit integer) */
|
|
597 |
||
321
by john haque
Polish array handling code. |
598 |
/*
|
599 |
* Warren Jr., Henry S. (2002). Hacker's Delight.
|
|
281.1.1
by john haque
Speed/memory performance improvements. |
600 |
* Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
|
601 |
*
|
|
602 |
* r = 0;
|
|
603 |
* if (num >= 1<<16) { num >>= 16; r += 16; }
|
|
604 |
* if (num >= 1<< 8) { num >>= 8; r += 8; }
|
|
605 |
* if (num >= 1<< 4) { num >>= 4; r += 4; }
|
|
606 |
* if (num >= 1<< 2) { num >>= 2; r += 2; }
|
|
607 |
* if (num >= 1<< 1) { r += 1; }
|
|
608 |
*/
|
|
609 |
||
610 |
||
321
by john haque
Polish array handling code. |
611 |
/*
|
612 |
* Slightly different code copied from:
|
|
281.1.1
by john haque
Speed/memory performance improvements. |
613 |
*
|
614 |
* http://www-graphics.stanford.edu/~seander/bithacks.html
|
|
615 |
* Bit Twiddling Hacks
|
|
616 |
* By Sean Eron Anderson
|
|
617 |
* seander@cs.stanford.edu
|
|
618 |
* Individually, the code snippets here are in the public domain
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
619 |
* (unless otherwise noted) --- feel free to use them however you please.
|
620 |
* The aggregate collection and descriptions are (C) 1997-2005
|
|
281.1.1
by john haque
Speed/memory performance improvements. |
621 |
* Sean Eron Anderson. The code and descriptions are distributed in the
|
622 |
* hope that they will be useful, but WITHOUT ANY WARRANTY and without
|
|
623 |
* even the implied warranty of merchantability or fitness for a particular
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
624 |
* purpose.
|
281.1.1
by john haque
Speed/memory performance improvements. |
625 |
*
|
626 |
*/
|
|
627 |
||
628 |
r = (num > 0xFFFF) << 4; num >>= r; |
|
629 |
shift = (num > 0xFF) << 3; num >>= shift; r |= shift; |
|
630 |
shift = (num > 0x0F) << 2; num >>= shift; r |= shift; |
|
631 |
shift = (num > 0x03) << 1; num >>= shift; r |= shift; |
|
632 |
r |= (num >> 1); |
|
633 |
||
634 |
/* We use a single HAT for 0 <= num < 2^NHAT */
|
|
635 |
if (r < NHAT) |
|
636 |
return NHAT; |
|
637 |
||
638 |
return (1 + r); |
|
639 |
}
|
|
640 |
||
641 |
||
642 |
/* cint_find --- locate the integer subscript */
|
|
643 |
||
644 |
static inline NODE ** |
|
645 |
cint_find(NODE *symbol, long k, int h1) |
|
646 |
{
|
|
647 |
NODE *tn; |
|
648 |
||
649 |
if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL) |
|
650 |
return NULL; |
|
651 |
return tree_exists(tn, k); |
|
652 |
}
|
|
653 |
||
654 |
||
655 |
#ifdef ARRAYDEBUG
|
|
656 |
||
657 |
/* cint_print --- print structural info */
|
|
658 |
||
659 |
static void |
|
660 |
cint_print(NODE *symbol) |
|
661 |
{
|
|
662 |
NODE *tn; |
|
663 |
size_t i; |
|
664 |
||
665 |
fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT, |
|
666 |
(unsigned long) symbol->table_size); |
|
667 |
for (i = NHAT; i < INT32_BIT; i++) { |
|
668 |
tn = symbol->nodes[i]; |
|
669 |
if (tn == NULL) |
|
670 |
continue; |
|
671 |
tree_print(tn, i, 1); |
|
672 |
}
|
|
673 |
}
|
|
674 |
||
675 |
#endif
|
|
676 |
||
677 |
||
678 |
/*------------------------ Hashed Array Trees -----------------------------*/
|
|
679 |
||
680 |
/*
|
|
681 |
* HATs: Hashed Array Trees
|
|
682 |
* Fast variable-length arrays
|
|
683 |
* Edward Sitarski
|
|
684 |
* http://www.drdobbs.com/architecture-and-design/184409965
|
|
685 |
*
|
|
686 |
* HAT has a top-level array containing a power of two
|
|
687 |
* number of leaf arrays. All leaf arrays are the same size as the
|
|
688 |
* top-level array. A full HAT can hold n^2 elements,
|
|
689 |
* where n (some power of 2) is the size of each leaf array.
|
|
690 |
* [i/n][i & (n - 1)] locates the `i th' element in a HAT.
|
|
691 |
*
|
|
692 |
*/
|
|
693 |
||
694 |
/*
|
|
695 |
* A half HAT is defined here as a HAT with a top-level array of size n^2/2
|
|
696 |
* and holds the first n^2/2 elements.
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
697 |
*
|
281.1.1
by john haque
Speed/memory performance improvements. |
698 |
* 1. 2^8 elements can be stored in a full HAT of size 2^4.
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
699 |
* 2. 2^9 elements can be stored in a half HAT of size 2^5.
|
281.1.1
by john haque
Speed/memory performance improvements. |
700 |
* 3. When the number of elements is some power of 2, it
|
701 |
* can be stored in a full or a half HAT.
|
|
702 |
* 4. When the number of elements is some power of 2, it
|
|
703 |
* can be stored in a HAT (full or half) with HATs as leaf elements
|
|
704 |
* (full or half), and so on (e.g. 2^8 elements in a HAT of size 2^4 (top-level
|
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
705 |
* array dimension) with each leaf array being a HAT of size 2^2).
|
281.1.1
by john haque
Speed/memory performance improvements. |
706 |
*
|
707 |
* IMPLEMENTATION DETAILS:
|
|
708 |
* 1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
|
|
709 |
* of Node_array_leaf.
|
|
710 |
*
|
|
711 |
* 2. A HAT of HATS of 2^12 elements needs
|
|
712 |
* 2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
|
|
713 |
* ~ 2^9 house-keeping NODEs.
|
|
714 |
*
|
|
715 |
* 3. When a leaf array (or leaf HAT) becomes empty, the memory
|
|
716 |
* is deallocated, and when there is no leaf array (or leaf HAT) left,
|
|
717 |
* the HAT is deleted.
|
|
718 |
*
|
|
719 |
* 4. A HAT stores the base (first) element, and locates the leaf array/HAT
|
|
720 |
* for the `i th' element using integer division
|
|
721 |
* (i - base)/n where n is the size of the top-level array.
|
|
722 |
*
|
|
723 |
*/
|
|
724 |
||
725 |
/* make_node --- initialize a NODE */
|
|
726 |
||
727 |
static inline NODE * |
|
728 |
make_node(NODETYPE type) |
|
729 |
{
|
|
730 |
NODE *n; |
|
731 |
getnode(n); |
|
732 |
memset(n, '\0', sizeof(NODE)); |
|
733 |
n->type = type; |
|
734 |
return n; |
|
735 |
}
|
|
736 |
||
737 |
||
738 |
/* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't there */
|
|
739 |
||
740 |
static NODE ** |
|
741 |
tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base) |
|
742 |
{
|
|
743 |
NODE **lhs; |
|
744 |
NODE *tn; |
|
745 |
int i, n; |
|
746 |
size_t size; |
|
747 |
long num = k; |
|
748 |
||
749 |
/*
|
|
750 |
* HAT size (size of Top & Leaf array) = 2^n
|
|
751 |
* where n = Floor ((m + 1)/2). For an odd value of m,
|
|
752 |
* only the first half of the HAT is needed.
|
|
753 |
*/
|
|
754 |
||
755 |
n = (m + 1) / 2; |
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
756 |
|
281.1.1
by john haque
Speed/memory performance improvements. |
757 |
if (tree->table_size == 0) { |
758 |
size_t actual_size; |
|
759 |
NODE **table; |
|
760 |
||
761 |
assert(tree->nodes == NULL); |
|
762 |
||
763 |
/* initialize top-level array */
|
|
764 |
size = actual_size = power_two_table[n]; |
|
765 |
tree->array_base = base; |
|
766 |
tree->array_size = size; |
|
767 |
tree->table_size = 0; /* # of elements in the array */ |
|
768 |
if (n > m/2) { |
|
769 |
/* only first half of the array used */
|
|
770 |
actual_size /= 2; |
|
771 |
tree->flags |= HALFHAT; |
|
772 |
}
|
|
731.9.23
by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro. |
773 |
ezalloc(table, NODE **, actual_size * sizeof(NODE *), "tree_lookup"); |
281.1.1
by john haque
Speed/memory performance improvements. |
774 |
tree->nodes = table; |
775 |
} else |
|
776 |
size = tree->array_size; |
|
777 |
||
778 |
num -= tree->array_base; |
|
779 |
i = num / size; /* top-level array index */ |
|
780 |
assert(i >= 0); |
|
781 |
||
782 |
if ((lhs = tree_find(tree, k, i)) != NULL) |
|
783 |
return lhs; |
|
784 |
||
785 |
/* It's not there, install it */
|
|
786 |
||
787 |
tree->table_size++; |
|
788 |
base += (size * i); |
|
789 |
tn = tree->nodes[i]; |
|
790 |
if (n > NHAT) { |
|
791 |
if (tn == NULL) |
|
792 |
tn = tree->nodes[i] = make_node(Node_array_tree); |
|
793 |
return tree_lookup(symbol, tn, k, n, base); |
|
794 |
} else { |
|
795 |
if (tn == NULL) |
|
796 |
tn = tree->nodes[i] = make_node(Node_array_leaf); |
|
797 |
return leaf_lookup(symbol, tn, k, size, base); |
|
798 |
}
|
|
799 |
}
|
|
800 |
||
801 |
||
802 |
/* tree_exists --- test whether integer subscript `k' exists or not */
|
|
803 |
||
804 |
static NODE ** |
|
805 |
tree_exists(NODE *tree, long k) |
|
806 |
{
|
|
807 |
int i; |
|
808 |
NODE *tn; |
|
809 |
||
810 |
i = (k - tree->array_base) / tree->array_size; |
|
811 |
assert(i >= 0); |
|
812 |
tn = tree->nodes[i]; |
|
813 |
if (tn == NULL) |
|
814 |
return NULL; |
|
815 |
if (tn->type == Node_array_tree) |
|
816 |
return tree_exists(tn, k); |
|
817 |
return leaf_exists(tn, k); |
|
818 |
}
|
|
819 |
||
820 |
/* tree_clear --- flush all the values */
|
|
821 |
||
822 |
static void |
|
823 |
tree_clear(NODE *tree) |
|
824 |
{
|
|
825 |
NODE *tn; |
|
826 |
size_t j, hsize; |
|
827 |
||
828 |
hsize = tree->array_size; |
|
829 |
if ((tree->flags & HALFHAT) != 0) |
|
830 |
hsize /= 2; |
|
831 |
||
832 |
for (j = 0; j < hsize; j++) { |
|
833 |
tn = tree->nodes[j]; |
|
834 |
if (tn == NULL) |
|
835 |
continue; |
|
836 |
if (tn->type == Node_array_tree) |
|
837 |
tree_clear(tn); |
|
838 |
else
|
|
839 |
leaf_clear(tn); |
|
840 |
freenode(tn); |
|
841 |
}
|
|
842 |
||
843 |
efree(tree->nodes); |
|
844 |
memset(tree, '\0', sizeof(NODE)); |
|
845 |
tree->type = Node_array_tree; |
|
846 |
}
|
|
847 |
||
848 |
||
849 |
/* tree_remove --- If the integer subscript is in the HAT, remove it */
|
|
850 |
||
851 |
static int |
|
852 |
tree_remove(NODE *symbol, NODE *tree, long k) |
|
853 |
{
|
|
854 |
int i; |
|
855 |
NODE *tn; |
|
856 |
||
857 |
i = (k - tree->array_base) / tree->array_size; |
|
858 |
assert(i >= 0); |
|
859 |
tn = tree->nodes[i]; |
|
860 |
if (tn == NULL) |
|
319.1.9
by Arnold D. Robbins
Move to use of bool type, true, false, everywhere. |
861 |
return false; |
281.1.1
by john haque
Speed/memory performance improvements. |
862 |
|
863 |
if (tn->type == Node_array_tree |
|
864 |
&& ! tree_remove(symbol, tn, k)) |
|
319.1.9
by Arnold D. Robbins
Move to use of bool type, true, false, everywhere. |
865 |
return false; |
281.1.1
by john haque
Speed/memory performance improvements. |
866 |
else if (tn->type == Node_array_leaf |
867 |
&& ! leaf_remove(symbol, tn, k)) |
|
319.1.9
by Arnold D. Robbins
Move to use of bool type, true, false, everywhere. |
868 |
return false; |
281.1.1
by john haque
Speed/memory performance improvements. |
869 |
|
870 |
if (tn->table_size == 0) { |
|
871 |
freenode(tn); |
|
872 |
tree->nodes[i] = NULL; |
|
873 |
}
|
|
874 |
||
875 |
/* one less item in array */
|
|
876 |
if (--tree->table_size == 0) { |
|
877 |
efree(tree->nodes); |
|
878 |
memset(tree, '\0', sizeof(NODE)); |
|
879 |
tree->type = Node_array_tree; |
|
880 |
}
|
|
319.1.9
by Arnold D. Robbins
Move to use of bool type, true, false, everywhere. |
881 |
return true; |
281.1.1
by john haque
Speed/memory performance improvements. |
882 |
}
|
883 |
||
884 |
||
885 |
/* tree_find --- locate an interger subscript in the HAT */
|
|
886 |
||
887 |
static inline NODE ** |
|
888 |
tree_find(NODE *tree, long k, int i) |
|
889 |
{
|
|
890 |
NODE *tn; |
|
891 |
||
892 |
assert(tree->nodes != NULL); |
|
893 |
tn = tree->nodes[i]; |
|
894 |
if (tn != NULL) { |
|
895 |
if (tn->type == Node_array_tree) |
|
896 |
return tree_exists(tn, k); |
|
897 |
return leaf_exists(tn, k); |
|
898 |
}
|
|
899 |
return NULL; |
|
900 |
}
|
|
901 |
||
902 |
||
903 |
/* tree_list --- return a list of items in the HAT */
|
|
904 |
||
905 |
static long |
|
322
by john haque
Improve array interface. |
906 |
tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind) |
281.1.1
by john haque
Speed/memory performance improvements. |
907 |
{
|
908 |
NODE *tn; |
|
909 |
size_t j, cj, hsize; |
|
910 |
long k = 0; |
|
911 |
||
912 |
assert(list != NULL); |
|
913 |
||
914 |
hsize = tree->array_size; |
|
915 |
if ((tree->flags & HALFHAT) != 0) |
|
916 |
hsize /= 2; |
|
917 |
||
918 |
for (j = 0; j < hsize; j++) { |
|
333
by Arnold D. Robbins
Merge branch 'master' into array-iface |
919 |
cj = (assoc_kind & ADESC) != 0 ? (hsize - 1 - j) : j; |
281.1.1
by john haque
Speed/memory performance improvements. |
920 |
tn = tree->nodes[cj]; |
921 |
if (tn == NULL) |
|
922 |
continue; |
|
923 |
if (tn->type == Node_array_tree) |
|
322
by john haque
Improve array interface. |
924 |
k += tree_list(tn, list + k, assoc_kind); |
281.1.1
by john haque
Speed/memory performance improvements. |
925 |
else
|
322
by john haque
Improve array interface. |
926 |
k += leaf_list(tn, list + k, assoc_kind); |
927 |
if ((assoc_kind & ADELETE) != 0 && k >= 1) |
|
281.1.1
by john haque
Speed/memory performance improvements. |
928 |
return k; |
929 |
}
|
|
930 |
return k; |
|
931 |
}
|
|
932 |
||
933 |
||
934 |
/* tree_copy --- duplicate a HAT */
|
|
935 |
||
936 |
static void |
|
937 |
tree_copy(NODE *newsymb, NODE *tree, NODE *newtree) |
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
938 |
{
|
281.1.1
by john haque
Speed/memory performance improvements. |
939 |
NODE **old, **new; |
940 |
size_t j, hsize; |
|
941 |
||
942 |
hsize = tree->array_size; |
|
943 |
if ((tree->flags & HALFHAT) != 0) |
|
944 |
hsize /= 2; |
|
945 |
||
731.9.23
by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro. |
946 |
ezalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy"); |
281.1.1
by john haque
Speed/memory performance improvements. |
947 |
newtree->nodes = new; |
948 |
newtree->array_base = tree->array_base; |
|
949 |
newtree->array_size = tree->array_size; |
|
950 |
newtree->table_size = tree->table_size; |
|
951 |
newtree->flags = tree->flags; |
|
952 |
||
953 |
old = tree->nodes; |
|
954 |
for (j = 0; j < hsize; j++) { |
|
955 |
if (old[j] == NULL) |
|
956 |
continue; |
|
957 |
if (old[j]->type == Node_array_tree) { |
|
958 |
new[j] = make_node(Node_array_tree); |
|
959 |
tree_copy(newsymb, old[j], new[j]); |
|
960 |
} else { |
|
961 |
new[j] = make_node(Node_array_leaf); |
|
962 |
leaf_copy(newsymb, old[j], new[j]); |
|
963 |
}
|
|
964 |
}
|
|
965 |
}
|
|
966 |
||
967 |
||
968 |
/* tree_info --- print index, value info */
|
|
969 |
||
970 |
static void |
|
971 |
tree_info(NODE *tree, NODE *ndump, const char *aname) |
|
972 |
{
|
|
973 |
NODE *tn; |
|
974 |
size_t j, hsize; |
|
975 |
||
976 |
hsize = tree->array_size; |
|
977 |
if ((tree->flags & HALFHAT) != 0) |
|
978 |
hsize /= 2; |
|
979 |
||
980 |
for (j = 0; j < hsize; j++) { |
|
981 |
tn = tree->nodes[j]; |
|
982 |
if (tn == NULL) |
|
983 |
continue; |
|
984 |
if (tn->type == Node_array_tree) |
|
985 |
tree_info(tn, ndump, aname); |
|
986 |
else
|
|
987 |
leaf_info(tn, ndump, aname); |
|
988 |
}
|
|
989 |
}
|
|
990 |
||
991 |
||
992 |
/* tree_kilobytes --- calculate memory consumption of a HAT */
|
|
993 |
||
994 |
static size_t |
|
995 |
tree_kilobytes(NODE *tree) |
|
996 |
{
|
|
997 |
NODE *tn; |
|
998 |
size_t j, hsize; |
|
999 |
size_t sz = 0; |
|
1000 |
||
1001 |
hsize = tree->array_size; |
|
1002 |
if ((tree->flags & HALFHAT) != 0) |
|
1003 |
hsize /= 2; |
|
1004 |
for (j = 0; j < hsize; j++) { |
|
1005 |
tn = tree->nodes[j]; |
|
1006 |
if (tn == NULL) |
|
1007 |
continue; |
|
1008 |
sz += sizeof(NODE); /* Node_array_tree or Node_array_leaf */ |
|
1009 |
if (tn->type == Node_array_tree) |
|
1010 |
sz += tree_kilobytes(tn); |
|
1011 |
}
|
|
1012 |
sz += hsize * sizeof(NODE *); /* tree->nodes */ |
|
1013 |
return sz; |
|
1014 |
}
|
|
1015 |
||
1016 |
#ifdef ARRAYDEBUG
|
|
1017 |
||
1018 |
/* tree_print --- print the HAT structures */
|
|
1019 |
||
1020 |
static void |
|
1021 |
tree_print(NODE *tree, size_t bi, int indent_level) |
|
1022 |
{
|
|
1023 |
NODE *tn; |
|
1024 |
size_t j, hsize; |
|
1025 |
||
1026 |
indent(indent_level); |
|
1027 |
||
1028 |
hsize = tree->array_size; |
|
1029 |
if ((tree->flags & HALFHAT) != 0) |
|
1030 |
hsize /= 2; |
|
319.2.21
by Arnold D. Robbins
Minor fixes for printf compile warnings. |
1031 |
fprintf(output_fp, "%4lu:%s[%4lu:%-4lu]\n", |
1032 |
(unsigned long) bi, |
|
319.1.140
by Arnold D. Robbins
Make bitflag checking consistent everywhere. |
1033 |
(tree->flags & HALFHAT) != 0 ? "HH" : "H", |
281.1.1
by john haque
Speed/memory performance improvements. |
1034 |
(unsigned long) hsize, (unsigned long) tree->table_size); |
1035 |
||
1036 |
for (j = 0; j < hsize; j++) { |
|
1037 |
tn = tree->nodes[j]; |
|
1038 |
if (tn == NULL) |
|
1039 |
continue; |
|
1040 |
if (tn->type == Node_array_tree) |
|
1041 |
tree_print(tn, j, indent_level + 1); |
|
1042 |
else
|
|
1043 |
leaf_print(tn, j, indent_level + 1); |
|
1044 |
}
|
|
1045 |
}
|
|
1046 |
#endif
|
|
1047 |
||
1048 |
/*--------------------- leaf (linear 1-D) array --------------------*/
|
|
1049 |
||
321
by john haque
Polish array handling code. |
1050 |
/*
|
1051 |
* leaf_lookup --- find an integer subscript in the array; Install it if
|
|
1052 |
* it isn't there.
|
|
1053 |
*/
|
|
281.1.1
by john haque
Speed/memory performance improvements. |
1054 |
|
1055 |
static inline NODE ** |
|
1056 |
leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base) |
|
1057 |
{
|
|
1058 |
NODE **lhs; |
|
1059 |
||
1060 |
if (array->nodes == NULL) { |
|
1061 |
array->table_size = 0; /* sanity */ |
|
1062 |
array->array_size = size; |
|
1063 |
array->array_base = base; |
|
731.9.23
by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro. |
1064 |
ezalloc(array->nodes, NODE **, size * sizeof(NODE *), "leaf_lookup"); |
281.1.1
by john haque
Speed/memory performance improvements. |
1065 |
symbol->array_capacity += size; |
1066 |
}
|
|
1067 |
||
1068 |
lhs = array->nodes + (k - base); /* leaf element */ |
|
1069 |
if (*lhs == NULL) { |
|
1070 |
array->table_size++; /* one more element in leaf array */ |
|
1720.1.27
by Arnold D. Robbins
Squashed commit of the following: |
1071 |
*lhs = new_array_element(); |
281.1.1
by john haque
Speed/memory performance improvements. |
1072 |
}
|
1073 |
return lhs; |
|
1074 |
}
|
|
1075 |
||
1076 |
||
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
1077 |
/* leaf_exists --- check if the array contains an integer subscript */
|
281.1.1
by john haque
Speed/memory performance improvements. |
1078 |
|
1079 |
static inline NODE ** |
|
1080 |
leaf_exists(NODE *array, long k) |
|
1081 |
{
|
|
1082 |
NODE **lhs; |
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
1083 |
lhs = array->nodes + (k - array->array_base); |
281.1.1
by john haque
Speed/memory performance improvements. |
1084 |
return (*lhs != NULL) ? lhs : NULL; |
1085 |
}
|
|
1086 |
||
1087 |
||
1088 |
/* leaf_clear --- flush all values in the array */
|
|
1089 |
||
1090 |
static void |
|
1091 |
leaf_clear(NODE *array) |
|
1092 |
{
|
|
1093 |
long i, size = array->array_size; |
|
1094 |
NODE *r; |
|
1095 |
||
1096 |
for (i = 0; i < size; i++) { |
|
1097 |
r = array->nodes[i]; |
|
1098 |
if (r == NULL) |
|
1099 |
continue; |
|
1100 |
if (r->type == Node_var_array) { |
|
1101 |
assoc_clear(r); /* recursively clear all sub-arrays */ |
|
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
1102 |
efree(r->vname); |
281.1.1
by john haque
Speed/memory performance improvements. |
1103 |
freenode(r); |
1104 |
} else |
|
1105 |
unref(r); |
|
1106 |
}
|
|
1107 |
efree(array->nodes); |
|
1108 |
array->nodes = NULL; |
|
1109 |
array->array_size = array->table_size = 0; |
|
1110 |
}
|
|
1111 |
||
1112 |
||
1113 |
/* leaf_remove --- remove an integer subscript from the array */
|
|
1114 |
||
1115 |
static int |
|
1116 |
leaf_remove(NODE *symbol, NODE *array, long k) |
|
1117 |
{
|
|
1118 |
NODE **lhs; |
|
1119 |
||
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
1120 |
lhs = array->nodes + (k - array->array_base); |
281.1.1
by john haque
Speed/memory performance improvements. |
1121 |
if (*lhs == NULL) |
319.1.9
by Arnold D. Robbins
Move to use of bool type, true, false, everywhere. |
1122 |
return false; |
281.1.1
by john haque
Speed/memory performance improvements. |
1123 |
*lhs = NULL; |
1124 |
if (--array->table_size == 0) { |
|
1125 |
efree(array->nodes); |
|
1126 |
array->nodes = NULL; |
|
1127 |
symbol->array_capacity -= array->array_size; |
|
1128 |
array->array_size = 0; /* sanity */ |
|
1129 |
}
|
|
319.1.9
by Arnold D. Robbins
Move to use of bool type, true, false, everywhere. |
1130 |
return true; |
281.1.1
by john haque
Speed/memory performance improvements. |
1131 |
}
|
1132 |
||
1133 |
||
1134 |
/* leaf_copy --- duplicate a leaf array */
|
|
1135 |
||
1136 |
static void |
|
1137 |
leaf_copy(NODE *newsymb, NODE *array, NODE *newarray) |
|
1138 |
{
|
|
1139 |
NODE **old, **new; |
|
1140 |
long size, i; |
|
1141 |
||
1142 |
size = array->array_size; |
|
731.9.23
by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro. |
1143 |
ezalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy"); |
281.1.1
by john haque
Speed/memory performance improvements. |
1144 |
newarray->nodes = new; |
1145 |
newarray->array_size = size; |
|
1146 |
newarray->array_base = array->array_base; |
|
1147 |
newarray->flags = array->flags; |
|
1148 |
newarray->table_size = array->table_size; |
|
1149 |
||
1150 |
old = array->nodes; |
|
1151 |
for (i = 0; i < size; i++) { |
|
1152 |
if (old[i] == NULL) |
|
1153 |
continue; |
|
1154 |
if (old[i]->type == Node_val) |
|
1155 |
new[i] = dupnode(old[i]); |
|
1156 |
else { |
|
1157 |
NODE *r; |
|
1158 |
r = make_array(); |
|
1159 |
r->vname = estrdup(old[i]->vname, strlen(old[i]->vname)); |
|
1160 |
r->parent_array = newsymb; |
|
1161 |
new[i] = assoc_copy(old[i], r); |
|
1162 |
}
|
|
1163 |
}
|
|
1164 |
}
|
|
1165 |
||
1166 |
||
1167 |
/* leaf_list --- return a list of items */
|
|
1168 |
||
1169 |
static long |
|
322
by john haque
Improve array interface. |
1170 |
leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind) |
281.1.1
by john haque
Speed/memory performance improvements. |
1171 |
{
|
1172 |
NODE *r, *subs; |
|
1173 |
long num, i, ci, k = 0; |
|
1174 |
long size = array->array_size; |
|
1175 |
static char buf[100]; |
|
1176 |
||
1177 |
for (i = 0; i < size; i++) { |
|
333
by Arnold D. Robbins
Merge branch 'master' into array-iface |
1178 |
ci = (assoc_kind & ADESC) != 0 ? (size - 1 - i) : i; |
281.1.1
by john haque
Speed/memory performance improvements. |
1179 |
r = array->nodes[ci]; |
1180 |
if (r == NULL) |
|
1181 |
continue; |
|
1182 |
||
1183 |
/* index */
|
|
1184 |
num = array->array_base + ci; |
|
333
by Arnold D. Robbins
Merge branch 'master' into array-iface |
1185 |
if ((assoc_kind & AISTR) != 0) { |
408.26.83
by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII. |
1186 |
sprintf(buf, "%ld", num); |
281.1.1
by john haque
Speed/memory performance improvements. |
1187 |
subs = make_string(buf, strlen(buf)); |
1188 |
subs->numbr = num; |
|
1189 |
subs->flags |= (NUMCUR|NUMINT); |
|
1190 |
} else { |
|
1191 |
subs = make_number((AWKNUM) num); |
|
1192 |
subs->flags |= (INTIND|NUMINT); |
|
1193 |
}
|
|
1194 |
list[k++] = subs; |
|
1195 |
||
1196 |
/* value */
|
|
333
by Arnold D. Robbins
Merge branch 'master' into array-iface |
1197 |
if ((assoc_kind & AVALUE) != 0) { |
281.1.1
by john haque
Speed/memory performance improvements. |
1198 |
if (r->type == Node_val) { |
322
by john haque
Improve array interface. |
1199 |
if ((assoc_kind & AVNUM) != 0) |
281.1.1
by john haque
Speed/memory performance improvements. |
1200 |
(void) force_number(r); |
322
by john haque
Improve array interface. |
1201 |
else if ((assoc_kind & AVSTR) != 0) |
281.1.1
by john haque
Speed/memory performance improvements. |
1202 |
r = force_string(r); |
1203 |
}
|
|
1204 |
list[k++] = r; |
|
1205 |
}
|
|
322
by john haque
Improve array interface. |
1206 |
if ((assoc_kind & ADELETE) != 0 && k >= 1) |
281.1.1
by john haque
Speed/memory performance improvements. |
1207 |
return k; |
1208 |
}
|
|
1209 |
||
1210 |
return k; |
|
1211 |
}
|
|
1212 |
||
1213 |
||
1214 |
/* leaf_info --- print index, value info */
|
|
1215 |
||
1216 |
static void |
|
1217 |
leaf_info(NODE *array, NODE *ndump, const char *aname) |
|
1218 |
{
|
|
1219 |
NODE *subs, *val; |
|
1220 |
size_t i, size; |
|
1221 |
||
1222 |
size = array->array_size; |
|
1223 |
||
1224 |
subs = make_number((AWKNUM) 0.0); |
|
1225 |
subs->flags |= (INTIND|NUMINT); |
|
1226 |
for (i = 0; i < size; i++) { |
|
1227 |
val = array->nodes[i]; |
|
1228 |
if (val == NULL) |
|
1229 |
continue; |
|
1230 |
subs->numbr = array->array_base + i; |
|
1231 |
assoc_info(subs, val, ndump, aname); |
|
1232 |
}
|
|
1233 |
unref(subs); |
|
1234 |
}
|
|
1235 |
||
1236 |
#ifdef ARRAYDEBUG
|
|
1237 |
||
1238 |
/* leaf_print --- print the leaf-array structure */
|
|
1239 |
||
1240 |
||
1241 |
static void |
|
1242 |
leaf_print(NODE *array, size_t bi, int indent_level) |
|
1243 |
{
|
|
1244 |
indent(indent_level); |
|
319.2.21
by Arnold D. Robbins
Minor fixes for printf compile warnings. |
1245 |
fprintf(output_fp, "%4lu:L[%4lu:%-4lu]\n", |
1246 |
(unsigned long) bi, |
|
281.1.1
by john haque
Speed/memory performance improvements. |
1247 |
(unsigned long) array->array_size, |
1248 |
(unsigned long) array->table_size); |
|
1249 |
}
|
|
1250 |
#endif
|
|
1146.1.22
by Arnold D. Robbins
Improve sandbox mode. |
1251 |
|
1252 |
static NODE *argv_shadow_array = NULL; |
|
1253 |
||
1254 |
/* argv_store --- post assign function for ARGV in sandbox mode */
|
|
1255 |
||
1256 |
static NODE ** |
|
1257 |
argv_store(NODE *symbol, NODE *subs) |
|
1258 |
{
|
|
1259 |
NODE **val = cint_exists(symbol, subs); |
|
1260 |
NODE *newval = *val; |
|
1261 |
char *cp; |
|
1262 |
||
1263 |
if (newval->stlen == 0) // empty strings in ARGV are OK |
|
1264 |
return val; |
|
1265 |
||
1266 |
if ((cp = strchr(newval->stptr, '=')) == NULL) { |
|
1267 |
if (! in_array(argv_shadow_array, newval)) |
|
1268 |
fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"), |
|
1269 |
(int) newval->stlen, newval->stptr); |
|
1270 |
} else { |
|
1271 |
// check if it's a valid variable assignment
|
|
1272 |
bool badvar = false; |
|
1273 |
char *arg = newval->stptr; |
|
1274 |
char *cp2; |
|
1275 |
||
1276 |
*cp = '\0'; // temporarily |
|
1277 |
||
1278 |
if (! is_letter((unsigned char) arg[0])) |
|
1279 |
badvar = true; |
|
1280 |
else
|
|
1281 |
for (cp2 = arg+1; *cp2; cp2++) |
|
1282 |
if (! is_identchar((unsigned char) *cp2) && *cp2 != ':') { |
|
1283 |
badvar = true; |
|
1284 |
break; |
|
1285 |
}
|
|
1286 |
||
1287 |
// further checks
|
|
1288 |
if (! badvar) { |
|
1289 |
char *cp = strchr(arg, ':'); |
|
1290 |
if (cp && (cp[1] != ':' || strchr(cp + 2, ':') != NULL)) |
|
1291 |
badvar = true; |
|
1292 |
}
|
|
1293 |
*cp = '='; // restore the '=' |
|
1294 |
||
1295 |
if (badvar && ! in_array(argv_shadow_array, newval)) |
|
1296 |
fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"), |
|
1297 |
(int) newval->stlen, newval->stptr); |
|
1298 |
||
1299 |
// otherwise, badvar is false, let it through as variable assignment
|
|
1300 |
}
|
|
1301 |
return val; |
|
1302 |
}
|
|
1303 |
||
1304 |
/* init_argv_array --- set up the pointers for ARGV in sandbox mode. A bit hacky. */
|
|
1305 |
||
1306 |
void
|
|
1307 |
init_argv_array(NODE *argv_node, NODE *shadow_node) |
|
1308 |
{
|
|
1309 |
/* If POSIX simply don't reset the vtable and things work as before */
|
|
1310 |
if (! do_sandbox) |
|
1311 |
return; |
|
1312 |
||
1313 |
argv_node->array_funcs = & argv_array_func; |
|
1314 |
argv_shadow_array = shadow_node; |
|
1315 |
}
|