~vcs-imports/gawk/master

281.1.1 by john haque
Speed/memory performance improvements.
1
/*
2
 * cint_array.c - routines for arrays of (mostly) consecutive positive integer indices.
3
 */
4
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
5
/*
1720.1.17 by Arnold D. Robbins
Update copyright year in a bunch of files.
6
 * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019-2022,
408.19.184 by Arnold D. Robbins
Changes toward release and test tarball.
7
 * the Free Software Foundation, Inc.
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
8
 *
281.1.1 by john haque
Speed/memory performance improvements.
9
 * This file is part of GAWK, the GNU implementation of the
10
 * AWK Programming Language.
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
11
 *
281.1.1 by john haque
Speed/memory performance improvements.
12
 * GAWK is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 3 of the License, or
15
 * (at your option) any later version.
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
16
 *
281.1.1 by john haque
Speed/memory performance improvements.
17
 * GAWK is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
21
 *
281.1.1 by john haque
Speed/memory performance improvements.
22
 * You should have received a copy of the GNU General Public License
23
 * along with this program; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
25
 */
26
27
#include "awk.h"
28
319.1.116 by Arnold D. Robbins
Clean up of awk.h.
29
#define INT32_BIT 32
30
281.1.1 by john haque
Speed/memory performance improvements.
31
extern FILE *output_fp;
32
extern void indent(int indent_level);
33
extern NODE **is_integer(NODE *symbol, NODE *subs);
34
35
/*
36
 * NHAT         ---  maximum size of a leaf array (2^NHAT).
37
 * THRESHOLD    ---  Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
38
 */
39
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
40
static int NHAT = 10;
281.1.1 by john haque
Speed/memory performance improvements.
41
static long THRESHOLD;
42
321 by john haque
Polish array handling code.
43
/*
44
 * What is the optimium NHAT ? timing results suggest that 10 is a good choice,
281.1.1 by john haque
Speed/memory performance improvements.
45
 * although differences aren't that significant for > 10.
46
 */
47
48
49
static NODE **cint_array_init(NODE *symbol, NODE *subs);
50
static NODE **is_uinteger(NODE *symbol, NODE *subs);
51
static NODE **cint_lookup(NODE *symbol, NODE *subs);
52
static NODE **cint_exists(NODE *symbol, NODE *subs);
53
static NODE **cint_clear(NODE *symbol, NODE *subs);
54
static NODE **cint_remove(NODE *symbol, NODE *subs);
55
static NODE **cint_list(NODE *symbol, NODE *t);
56
static NODE **cint_copy(NODE *symbol, NODE *newsymb);
57
static NODE **cint_dump(NODE *symbol, NODE *ndump);
58
#ifdef ARRAYDEBUG
59
static void cint_print(NODE *symbol);
60
#endif
61
1051 by Andrew J. Schorr
Use a struct instead of an array to contain the array methods.
62
const array_funcs_t cint_array_func = {
63
	"cint",
281.1.1 by john haque
Speed/memory performance improvements.
64
	cint_array_init,
65
	is_uinteger,
66
	cint_lookup,
67
	cint_exists,
68
	cint_clear,
69
	cint_remove,
70
	cint_list,
71
	cint_copy,
72
	cint_dump,
322 by john haque
Improve array interface.
73
	(afunc_t) 0,
281.1.1 by john haque
Speed/memory performance improvements.
74
};
75
1146.1.22 by Arnold D. Robbins
Improve sandbox mode.
76
77
static NODE **argv_store(NODE *symbol, NODE *subs);
78
79
/* special case for ARGV in sandbox mode */
1472.3.46 by Arnold D. Robbins
Make a number of variables and functions be static.
80
static const array_funcs_t argv_array_func = {
1146.1.22 by Arnold D. Robbins
Improve sandbox mode.
81
	"argv",
82
	cint_array_init,
83
	is_uinteger,
84
	cint_lookup,
85
	cint_exists,
86
	cint_clear,
87
	cint_remove,
88
	cint_list,
89
	cint_copy,
90
	cint_dump,
91
	argv_store,
92
};
93
281.1.1 by john haque
Speed/memory performance improvements.
94
static inline int cint_hash(long k);
95
static inline NODE **cint_find(NODE *symbol, long k, int h1);
96
97
static inline NODE *make_node(NODETYPE type);
98
99
static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base);
100
static NODE **tree_exists(NODE *tree, long k);
101
static void tree_clear(NODE *tree);
102
static int tree_remove(NODE *symbol, NODE *tree, long k);
103
static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree);
322 by john haque
Improve array interface.
104
static long tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind);
281.1.1 by john haque
Speed/memory performance improvements.
105
static inline NODE **tree_find(NODE *tree, long k, int i);
106
static void tree_info(NODE *tree, NODE *ndump, const char *aname);
107
static size_t tree_kilobytes(NODE *tree);
108
#ifdef ARRAYDEBUG
109
static void tree_print(NODE *tree, size_t bi, int indent_level);
110
#endif
111
112
static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base);
113
static inline NODE **leaf_exists(NODE *array, long k);
114
static void leaf_clear(NODE *array);
115
static int leaf_remove(NODE *symbol, NODE *array, long k);
116
static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray);
322 by john haque
Improve array interface.
117
static long leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind);
281.1.1 by john haque
Speed/memory performance improvements.
118
static void leaf_info(NODE *array, NODE *ndump, const char *aname);
119
#ifdef ARRAYDEBUG
120
static void leaf_print(NODE *array, size_t bi, int indent_level);
121
#endif
122
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
123
/* powers of 2 table upto 2^30 */
281.1.1 by john haque
Speed/memory performance improvements.
124
static const long power_two_table[] = {
125
	1, 2, 4, 8, 16, 32, 64,
126
	128, 256, 512, 1024, 2048, 4096,
127
	8192, 16384, 32768, 65536, 131072, 262144,
128
	524288, 1048576, 2097152, 4194304, 8388608, 16777216,
129
	33554432, 67108864, 134217728, 268435456, 536870912, 1073741824
130
};
131
132
133
#define ISUINT(a, s)	((((s)->flags & NUMINT) != 0 || is_integer(a, s) != NULL) \
134
                                    && (s)->numbr >= 0)
135
136
/*
137
 * To store 2^n integers, allocate top-level array of size n, elements
138
 * of which are 1-Dimensional (leaf-array) of geometrically increasing
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
139
 * size (power of 2).
281.1.1 by john haque
Speed/memory performance improvements.
140
 *
141
 *  [0]   -->  [ 0 ]
142
 *  [1]   -->  [ 1 ]
143
 *  |2|   -->  [ 2 | 3 ]
144
 *  |3|   -->  [ 4 | 5 | 6 | 7 ]
145
 *  |.|
146
 *  |k|   -->  [ 2^(k - 1)| ...  | 2^k - 1 ]
147
 *  ...
148
 *
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
149
 * For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)).
281.1.1 by john haque
Speed/memory performance improvements.
150
 *
151
 * The idea for the geometrically increasing array sizes is from:
152
 * 	Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
153
 * 	Bagwell, Phil (2002).
154
 * 	http://infoscience.epfl.ch/record/64410/files/techlists.pdf
155
 *
156
 * Disadvantage:
157
 * Worst case memory waste > 99% and will happen when each of the
158
 * leaf arrays contains only a single element. Even with consecutive
159
 * integers, memory waste can be as high as 50%.
160
 *
161
 * Solution: Hashed Array Trees (HATs).
162
 *
163
 */
164
321 by john haque
Polish array handling code.
165
/* cint_array_init ---  array initialization routine */
281.1.1 by john haque
Speed/memory performance improvements.
166
167
static NODE **
168
cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
169
{
321 by john haque
Polish array handling code.
170
	if (symbol == NULL) {
171
		long newval;
408.4.32 by Arnold D. Robbins
Fixes based on problems from a static checker.
172
		size_t nelems = (sizeof(power_two_table) / sizeof(power_two_table[0]));
321 by john haque
Polish array handling code.
173
174
		/* check relevant environment variables */
175
		if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT)
176
			NHAT = newval;
408.4.32 by Arnold D. Robbins
Fixes based on problems from a static checker.
177
		/* don't allow overflow off the end of the table */
1253.1.1 by Andrew J. Schorr
Fix cint off-by-one array bounds overflow check for NHAT set in the environment.
178
		if (NHAT > nelems - 2)
408.4.32 by Arnold D. Robbins
Fixes based on problems from a static checker.
179
			NHAT = nelems - 2;
321 by john haque
Polish array handling code.
180
		THRESHOLD = power_two_table[NHAT + 1];
181
	} else
182
		null_array(symbol);
183
408.19.164 by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines.
184
	return & success_node;
281.1.1 by john haque
Speed/memory performance improvements.
185
}
186
187
188
/* is_uinteger --- test if the subscript is an integer >= 0 */
189
190
NODE **
191
is_uinteger(NODE *symbol, NODE *subs)
192
{
193
	if (is_integer(symbol, subs) != NULL && subs->numbr >= 0)
408.19.164 by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines.
194
		return & success_node;
281.1.1 by john haque
Speed/memory performance improvements.
195
	return NULL;
196
}
197
198
199
/* cint_lookup --- Find the subscript in the array; Install it if it isn't there. */
200
201
static NODE **
202
cint_lookup(NODE *symbol, NODE *subs)
203
{
204
	NODE **lhs;
205
	long k;
206
	int h1 = -1, m, li;
207
	NODE *tn, *xn;
208
	long cint_size, capacity;
209
210
	k = -1;
211
	if (ISUINT(symbol, subs)) {
212
		k = subs->numbr;	/* k >= 0 */
213
		h1 = cint_hash(k);	/* h1 >= NHAT */
214
		if ((lhs = cint_find(symbol, k, h1)) != NULL)
215
			return lhs;
216
	}
217
	xn = symbol->xarray;
218
	if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL)
219
		return lhs;
220
221
	/* It's not there, install it */
222
223
	if (k < 0)
224
		goto xinstall;
225
226
	m = h1 - 1;	/* m >= (NHAT- 1) */
227
228
	/* Estimate capacity upper bound.
229
	 * capacity upper bound = current capacity + leaf array size.
230
	 */
231
	li = m > NHAT ? m : NHAT;
232
	while (li >= NHAT) {
233
		/* leaf-array of a HAT */
234
		li = (li + 1) / 2;
235
	}
236
	capacity = symbol->array_capacity + power_two_table[li];
237
238
	cint_size = (xn == NULL) ? symbol->table_size
239
				: (symbol->table_size - xn->table_size);
240
	assert(cint_size >= 0);
241
	if ((capacity - cint_size) > THRESHOLD)
242
		goto xinstall;
243
244
	if (symbol->nodes == NULL) {
245
		symbol->array_capacity = 0;
246
		assert(symbol->table_size == 0);
247
248
		/* nodes[0] .. nodes[NHAT- 1] not used */
731.9.23 by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro.
249
		ezalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), "cint_lookup");
281.1.1 by john haque
Speed/memory performance improvements.
250
	}
251
252
	symbol->table_size++;	/* one more element in array */
253
254
	tn = symbol->nodes[h1];
255
	if (tn == NULL) {
256
		tn = make_node(Node_array_tree);
257
		symbol->nodes[h1] = tn;
258
	}
259
260
	if (m < NHAT)
261
		return tree_lookup(symbol, tn, k, NHAT, 0);
262
	return tree_lookup(symbol, tn, k, m, power_two_table[m]);
263
264
xinstall:
265
266
	symbol->table_size++;
267
	if (xn == NULL) {
268
		xn = symbol->xarray = make_array();
269
		xn->vname = symbol->vname;	/* shallow copy */
270
321 by john haque
Polish array handling code.
271
		/*
272
		 * Avoid using assoc_lookup(xn, subs) which may lead
281.1.1 by john haque
Speed/memory performance improvements.
273
		 * to infinite recursion.
274
		 */
275
276
		if (is_integer(xn, subs))
1051 by Andrew J. Schorr
Use a struct instead of an array to contain the array methods.
277
			xn->array_funcs = & int_array_func;
281.1.1 by john haque
Speed/memory performance improvements.
278
		else
1051 by Andrew J. Schorr
Use a struct instead of an array to contain the array methods.
279
			xn->array_funcs = & str_array_func;
281.1.1 by john haque
Speed/memory performance improvements.
280
		xn->flags |= XARRAY;
281
	}
282
	return xn->alookup(xn, subs);
283
}
284
285
286
/* cint_exists --- test whether an index is in the array or not. */
287
288
static NODE **
289
cint_exists(NODE *symbol, NODE *subs)
290
{
291
	NODE *xn;
292
293
	if (ISUINT(symbol, subs)) {
294
		long k = subs->numbr;
295
		NODE **lhs;
296
		if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL)
297
			return lhs;
298
	}
299
	if ((xn = symbol->xarray) == NULL)
300
		return NULL;
301
	return xn->aexists(xn, subs);
302
}
303
304
305
/* cint_clear --- flush all the values in symbol[] */
306
307
static NODE **
308
cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
309
{
310
	size_t i;
311
	NODE *tn;
312
313
	assert(symbol->nodes != NULL);
314
315
	if (symbol->xarray != NULL) {
316
		NODE *xn = symbol->xarray;
317
		assoc_clear(xn);
318
		freenode(xn);
319
		symbol->xarray = NULL;
320
	}
321
322
	for (i = NHAT; i < INT32_BIT; i++) {
323
		tn = symbol->nodes[i];
324
		if (tn != NULL) {
325
			tree_clear(tn);
326
			freenode(tn);
327
		}
328
	}
329
330
	efree(symbol->nodes);
321 by john haque
Polish array handling code.
331
	symbol->ainit(symbol, NULL);	/* re-initialize symbol */
281.1.1 by john haque
Speed/memory performance improvements.
332
	return NULL;
333
}
334
335
336
/* cint_remove --- remove an index from the array */
337
338
static NODE **
339
cint_remove(NODE *symbol, NODE *subs)
340
{
341
	long k;
342
	int h1;
343
	NODE *tn, *xn = symbol->xarray;
344
291 by Arnold D. Robbins
The grand merge: dgawk and pgawk folded into gawk.
345
	if (symbol->table_size == 0)
346
		return NULL;
347
281.1.1 by john haque
Speed/memory performance improvements.
348
	if (! ISUINT(symbol, subs))
349
		goto xremove;
350
291 by Arnold D. Robbins
The grand merge: dgawk and pgawk folded into gawk.
351
	assert(symbol->nodes != NULL);
352
281.1.1 by john haque
Speed/memory performance improvements.
353
	k = subs->numbr;
354
	h1 = cint_hash(k);
355
	tn = symbol->nodes[h1];
356
	if (tn == NULL || ! tree_remove(symbol, tn, k))
357
		goto xremove;
358
359
	if (tn->table_size == 0) {
360
		freenode(tn);
361
		symbol->nodes[h1] = NULL;
362
	}
363
364
	symbol->table_size--;
365
366
	if (xn == NULL && symbol->table_size == 0) {
367
		efree(symbol->nodes);
321 by john haque
Polish array handling code.
368
		symbol->ainit(symbol, NULL);	/* re-initialize array 'symbol' */
281.1.1 by john haque
Speed/memory performance improvements.
369
	} else if(xn != NULL && symbol->table_size == xn->table_size) {
370
		/* promote xn to symbol */
291 by Arnold D. Robbins
The grand merge: dgawk and pgawk folded into gawk.
371
281.1.1 by john haque
Speed/memory performance improvements.
372
		xn->flags &= ~XARRAY;
373
		xn->parent_array = symbol->parent_array;
374
		efree(symbol->nodes);
375
		*symbol = *xn;
376
		freenode(xn);
377
	}
378
408.19.164 by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines.
379
	return & success_node;
281.1.1 by john haque
Speed/memory performance improvements.
380
381
xremove:
382
	xn = symbol->xarray;
383
	if (xn == NULL || xn->aremove(xn, subs) == NULL)
384
		return NULL;
385
	if (xn->table_size == 0) {
386
		freenode(xn);
387
		symbol->xarray = NULL;
388
	}
389
	symbol->table_size--;
390
	assert(symbol->table_size > 0);
391
408.19.164 by Arnold D. Robbins
Replace `! NULL' with `& success_node' in array routines.
392
	return & success_node;
281.1.1 by john haque
Speed/memory performance improvements.
393
}
394
395
396
/* cint_copy --- duplicate input array "symbol" */
397
398
static NODE **
399
cint_copy(NODE *symbol, NODE *newsymb)
400
{
401
	NODE **old, **new;
402
	size_t i;
403
404
	assert(symbol->nodes != NULL);
405
406
	/* allocate new table */
731.9.23 by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro.
407
	ezalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy");
281.1.1 by john haque
Speed/memory performance improvements.
408
409
	old = symbol->nodes;
410
	for (i = NHAT; i < INT32_BIT; i++) {
411
		if (old[i] == NULL)
412
			continue;
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
413
		new[i] = make_node(Node_array_tree);
281.1.1 by john haque
Speed/memory performance improvements.
414
		tree_copy(newsymb, old[i], new[i]);
415
	}
416
417
	if (symbol->xarray != NULL) {
418
		NODE *xn, *n;
419
		xn = symbol->xarray;
420
		n = make_array();
421
		n->vname = newsymb->vname;
422
		(void) xn->acopy(xn, n);
423
		newsymb->xarray = n;
424
	} else
425
		newsymb->xarray = NULL;
426
427
	newsymb->nodes = new;
428
	newsymb->table_size = symbol->table_size;
429
	newsymb->array_capacity = symbol->array_capacity;
430
	newsymb->flags = symbol->flags;
431
432
	return NULL;
433
}
434
435
436
/* cint_list --- return a list of items */
437
438
static NODE**
439
cint_list(NODE *symbol, NODE *t)
440
{
441
	NODE **list = NULL;
442
	NODE *tn, *xn;
443
	unsigned long k = 0, num_elems, list_size;
444
	size_t j, ja, jd;
445
	int elem_size = 1;
322 by john haque
Improve array interface.
446
	assoc_kind_t assoc_kind;
281.1.1 by john haque
Speed/memory performance improvements.
447
448
	num_elems = symbol->table_size;
449
	if (num_elems == 0)
450
		return NULL;
322 by john haque
Improve array interface.
451
	assoc_kind = (assoc_kind_t) t->flags;
452
	if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
281.1.1 by john haque
Speed/memory performance improvements.
453
		num_elems = 1;
454
322 by john haque
Improve array interface.
455
	if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
281.1.1 by john haque
Speed/memory performance improvements.
456
		elem_size = 2;
457
	list_size = num_elems * elem_size;
458
459
	if (symbol->xarray != NULL) {
460
		xn = symbol->xarray;
461
		list = xn->alist(xn, t);
462
		assert(list != NULL);
322 by john haque
Improve array interface.
463
		assoc_kind &= ~(AASC|ADESC);
464
		t->flags = (unsigned int) assoc_kind;
281.1.1 by john haque
Speed/memory performance improvements.
465
		if (num_elems == 1 || num_elems == xn->table_size)
466
			return list;
467
		erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
468
		k = elem_size * xn->table_size;
469
	} else
470
		emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
471
322 by john haque
Improve array interface.
472
	if ((assoc_kind & AINUM) == 0) {
473
		/* not sorting by "index num" */
474
		assoc_kind &= ~(AASC|ADESC);
475
		t->flags = (unsigned int) assoc_kind;
476
	}
281.1.1 by john haque
Speed/memory performance improvements.
477
478
	/* populate it with index in ascending or descending order */
479
480
	for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) {
333 by Arnold D. Robbins
Merge branch 'master' into array-iface
481
		j = (assoc_kind & ADESC) != 0 ? jd-- : ja++;
281.1.1 by john haque
Speed/memory performance improvements.
482
		tn = symbol->nodes[j];
483
		if (tn == NULL)
484
			continue;
322 by john haque
Improve array interface.
485
		k += tree_list(tn, list + k, assoc_kind);
281.1.1 by john haque
Speed/memory performance improvements.
486
		if (k >= list_size)
487
			return list;
488
	}
489
	return list;
490
}
491
492
493
/* cint_dump --- dump array info */
494
495
static NODE **
496
cint_dump(NODE *symbol, NODE *ndump)
497
{
498
	NODE *tn, *xn = NULL;
499
	int indent_level;
500
	size_t i;
281.1.2 by john haque
Add a test file, cleanup code and update doc.
501
	long cint_size = 0, xsize = 0;
281.1.1 by john haque
Speed/memory performance improvements.
502
	AWKNUM kb = 0;
503
	extern AWKNUM int_kilobytes(NODE *symbol);
281.1.2 by john haque
Add a test file, cleanup code and update doc.
504
	extern AWKNUM str_kilobytes(NODE *symbol);
281.1.1 by john haque
Speed/memory performance improvements.
505
506
	indent_level = ndump->alevel;
507
508
	if (symbol->xarray != NULL) {
509
		xn = symbol->xarray;
281.1.2 by john haque
Add a test file, cleanup code and update doc.
510
		xsize = xn->table_size;
281.1.1 by john haque
Speed/memory performance improvements.
511
	}
281.1.2 by john haque
Add a test file, cleanup code and update doc.
512
	cint_size = symbol->table_size - xsize;
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
513
281.1.1 by john haque
Speed/memory performance improvements.
514
	if ((symbol->flags & XARRAY) == 0)
515
		fprintf(output_fp, "%s `%s'\n",
516
			(symbol->parent_array == NULL) ? "array" : "sub-array",
517
			array_vname(symbol));
518
	indent_level++;
519
	indent(indent_level);
520
	fprintf(output_fp, "array_func: cint_array_func\n");
521
	if (symbol->flags != 0) {
522
		indent(indent_level);
523
		fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
524
	}
525
	indent(indent_level);
526
	fprintf(output_fp, "NHAT: %d\n", NHAT);
527
	indent(indent_level);
528
	fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD);
529
	indent(indent_level);
1379.1.24 by Arnold D. Robbins
Fix some printf formatting issues.
530
	fprintf(output_fp, "table_size: %lu (total), %ld (cint), %ld (int + str)\n",
531
				(unsigned long) symbol->table_size, cint_size, xsize);
281.1.1 by john haque
Speed/memory performance improvements.
532
	indent(indent_level);
533
	fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) symbol->array_capacity);
534
	indent(indent_level);
535
	fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / symbol->array_capacity);
536
537
	for (i = NHAT; i < INT32_BIT; i++) {
538
		tn = symbol->nodes[i];
539
		if (tn == NULL)
540
			continue;
541
		/* Node_array_tree  + HAT */
542
		kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0;
543
	}
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
544
	kb += (INT32_BIT * sizeof(NODE *)) / 1024.0;	/* symbol->nodes */
281.1.1 by john haque
Speed/memory performance improvements.
545
	kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0;	/* value nodes in Node_array_leaf(s) */
281.1.2 by john haque
Add a test file, cleanup code and update doc.
546
	if (xn != NULL) {
1051 by Andrew J. Schorr
Use a struct instead of an array to contain the array methods.
547
		if (xn->array_funcs == & int_array_func)
281.1.2 by john haque
Add a test file, cleanup code and update doc.
548
			kb += int_kilobytes(xn);
549
		else
550
			kb += str_kilobytes(xn);
551
	}
552
281.1.1 by john haque
Speed/memory performance improvements.
553
	indent(indent_level);
554
	fprintf(output_fp, "memory: %.2g kB (total)\n", kb);
555
556
	/* dump elements */
557
558
	if (ndump->adepth >= 0) {
559
		const char *aname;
560
561
		fprintf(output_fp, "\n");
562
		aname = make_aname(symbol);
563
		for (i = NHAT; i < INT32_BIT; i++) {
564
			tn = symbol->nodes[i];
565
			if (tn != NULL)
566
				tree_info(tn, ndump, aname);
567
		}
568
	}
569
570
	if (xn != NULL) {
571
		fprintf(output_fp, "\n");
572
		xn->adump(xn, ndump);
573
	}
574
575
#ifdef ARRAYDEBUG
576
	if (ndump->adepth < -999)
577
		cint_print(symbol);
578
#endif
579
580
	return NULL;
581
}
582
583
584
/* cint_hash --- locate the HAT for a given number 'k' */
585
586
static inline int
587
cint_hash(long k)
588
{
589
	uint32_t num, r, shift;
590
591
	assert(k >= 0);
592
	if (k == 0)
593
		return NHAT;
594
	num = k;
595
596
	/* Find the Floor(log base 2 of 32-bit integer) */
597
321 by john haque
Polish array handling code.
598
	/*
599
	 * Warren Jr., Henry S. (2002). Hacker's Delight.
281.1.1 by john haque
Speed/memory performance improvements.
600
	 * Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
601
	 *
602
	 *	r = 0;
603
	 *	if (num >= 1<<16) { num >>= 16;	r += 16; }
604
	 *	if (num >= 1<< 8) { num >>=  8;	r +=  8; }
605
	 *	if (num >= 1<< 4) { num >>=  4;	r +=  4; }
606
	 *	if (num >= 1<< 2) { num >>=  2;	r +=  2; }
607
	 *	if (num >= 1<< 1) {		r +=  1; }
608
	 */
609
610
321 by john haque
Polish array handling code.
611
	/*
612
	 * Slightly different code copied from:
281.1.1 by john haque
Speed/memory performance improvements.
613
	 *
614
	 * http://www-graphics.stanford.edu/~seander/bithacks.html
615
	 * Bit Twiddling Hacks
616
	 * By Sean Eron Anderson
617
	 * seander@cs.stanford.edu
618
	 * Individually, the code snippets here are in the public domain
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
619
	 * (unless otherwise noted) --- feel free to use them however you please.
620
	 * The aggregate collection and descriptions are (C) 1997-2005
281.1.1 by john haque
Speed/memory performance improvements.
621
	 * Sean Eron Anderson. The code and descriptions are distributed in the
622
	 * hope that they will be useful, but WITHOUT ANY WARRANTY and without
623
	 * even the implied warranty of merchantability or fitness for a particular
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
624
	 * purpose.
281.1.1 by john haque
Speed/memory performance improvements.
625
	 *
626
	 */
627
628
	r = (num > 0xFFFF) << 4; num >>= r;
629
	shift = (num > 0xFF) << 3; num >>= shift; r |= shift;
630
	shift = (num > 0x0F) << 2; num >>= shift; r |= shift;
631
	shift = (num > 0x03) << 1; num >>= shift; r |= shift;
632
	r |= (num >> 1);
633
634
	/* We use a single HAT for 0 <= num < 2^NHAT */
635
	if (r < NHAT)
636
		return NHAT;
637
638
	return (1 + r);
639
}
640
641
642
/* cint_find --- locate the integer subscript */
643
644
static inline NODE **
645
cint_find(NODE *symbol, long k, int h1)
646
{
647
	NODE *tn;
648
649
	if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL)
650
		return NULL;
651
	return tree_exists(tn, k);
652
}
653
654
655
#ifdef ARRAYDEBUG
656
657
/* cint_print --- print structural info */
658
659
static void
660
cint_print(NODE *symbol)
661
{
662
	NODE *tn;
663
	size_t i;
664
665
	fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT,
666
				(unsigned long) symbol->table_size);
667
	for (i = NHAT; i < INT32_BIT; i++) {
668
		tn = symbol->nodes[i];
669
		if (tn == NULL)
670
			continue;
671
		tree_print(tn, i, 1);
672
	}
673
}
674
675
#endif
676
677
678
/*------------------------ Hashed Array Trees -----------------------------*/
679
680
/*
681
 * HATs: Hashed Array Trees
682
 * Fast variable-length arrays
683
 * Edward Sitarski
684
 * http://www.drdobbs.com/architecture-and-design/184409965
685
 *
686
 *  HAT has a top-level array containing a power of two
687
 *  number of leaf arrays. All leaf arrays are the same size as the
688
 *  top-level array. A full HAT can hold n^2 elements,
689
 *  where n (some power of 2) is the size of each leaf array.
690
 *  [i/n][i & (n - 1)] locates the `i th' element in a HAT.
691
 *
692
 */
693
694
/*
695
 *  A half HAT is defined here as a HAT with a top-level array of size n^2/2
696
 *  and holds the first n^2/2 elements.
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
697
 *
281.1.1 by john haque
Speed/memory performance improvements.
698
 *   1. 2^8 elements can be stored in a full HAT of size 2^4.
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
699
 *   2. 2^9 elements can be stored in a half HAT of size 2^5.
281.1.1 by john haque
Speed/memory performance improvements.
700
 *   3. When the number of elements is some power of 2, it
701
 *      can be stored in a full or a half HAT.
702
 *   4. When the number of elements is some power of 2, it
703
 *      can be stored in a HAT (full or half) with HATs as leaf elements
704
 *      (full or half),  and so on (e.g. 2^8 elements in a HAT of size 2^4 (top-level
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
705
 *      array dimension) with each leaf array being a HAT of size 2^2).
281.1.1 by john haque
Speed/memory performance improvements.
706
 *
707
 *  IMPLEMENTATION DETAILS:
708
 *    1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
709
 *       of Node_array_leaf.
710
 *
711
 *    2. A HAT of HATS of 2^12 elements needs
712
 *       2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
713
 *       ~ 2^9 house-keeping NODEs.
714
 *
715
 *    3. When a leaf array (or leaf HAT) becomes empty, the memory
716
 *       is deallocated, and when there is no leaf array (or leaf HAT) left,
717
 *       the HAT is deleted.
718
 *
719
 *    4. A HAT stores the base (first) element, and locates the leaf array/HAT
720
 *       for the `i th' element using integer division
721
 *       (i - base)/n where n is the size of the top-level array.
722
 *
723
 */
724
725
/* make_node --- initialize a NODE */
726
727
static inline NODE *
728
make_node(NODETYPE type)
729
{
730
	NODE *n;
731
	getnode(n);
732
	memset(n, '\0', sizeof(NODE));
733
	n->type = type;
734
	return n;
735
}
736
737
738
/* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't there */
739
740
static NODE **
741
tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base)
742
{
743
	NODE **lhs;
744
	NODE *tn;
745
	int i, n;
746
	size_t size;
747
	long num = k;
748
749
	/*
750
	 * HAT size (size of Top & Leaf array) = 2^n
751
	 * where n = Floor ((m + 1)/2). For an odd value of m,
752
	 * only the first half of the HAT is needed.
753
	 */
754
755
	n = (m + 1) / 2;
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
756
281.1.1 by john haque
Speed/memory performance improvements.
757
	if (tree->table_size == 0) {
758
		size_t actual_size;
759
		NODE **table;
760
761
		assert(tree->nodes == NULL);
762
763
		/* initialize top-level array */
764
		size = actual_size = power_two_table[n];
765
		tree->array_base = base;
766
		tree->array_size = size;
767
		tree->table_size = 0;	/* # of elements in the array */
768
		if (n > m/2) {
769
			/* only first half of the array used */
770
			actual_size /= 2;
771
			tree->flags |= HALFHAT;
772
		}
731.9.23 by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro.
773
		ezalloc(table, NODE **, actual_size * sizeof(NODE *), "tree_lookup");
281.1.1 by john haque
Speed/memory performance improvements.
774
		tree->nodes = table;
775
	} else
776
		size = tree->array_size;
777
778
	num -= tree->array_base;
779
	i = num / size;	/* top-level array index */
780
	assert(i >= 0);
781
782
	if ((lhs = tree_find(tree, k, i)) != NULL)
783
		return lhs;
784
785
	/* It's not there, install it */
786
787
	tree->table_size++;
788
	base += (size * i);
789
	tn = tree->nodes[i];
790
	if (n > NHAT) {
791
		if (tn == NULL)
792
			tn = tree->nodes[i] = make_node(Node_array_tree);
793
		return tree_lookup(symbol, tn, k, n, base);
794
	} else {
795
		if (tn == NULL)
796
			tn = tree->nodes[i] = make_node(Node_array_leaf);
797
		return leaf_lookup(symbol, tn, k, size, base);
798
	}
799
}
800
801
802
/* tree_exists --- test whether integer subscript `k' exists or not */
803
804
static NODE **
805
tree_exists(NODE *tree, long k)
806
{
807
	int i;
808
	NODE *tn;
809
810
	i = (k - tree->array_base) / tree->array_size;
811
	assert(i >= 0);
812
	tn = tree->nodes[i];
813
	if (tn == NULL)
814
		return NULL;
815
	if (tn->type == Node_array_tree)
816
		return tree_exists(tn, k);
817
	return leaf_exists(tn, k);
818
}
819
820
/* tree_clear --- flush all the values */
821
822
static void
823
tree_clear(NODE *tree)
824
{
825
	NODE *tn;
826
	size_t	j, hsize;
827
828
	hsize = tree->array_size;
829
	if ((tree->flags & HALFHAT) != 0)
830
		hsize /= 2;
831
832
	for (j = 0; j < hsize; j++) {
833
		tn = tree->nodes[j];
834
		if (tn == NULL)
835
			continue;
836
		if (tn->type == Node_array_tree)
837
			tree_clear(tn);
838
		else
839
			leaf_clear(tn);
840
		freenode(tn);
841
	}
842
843
	efree(tree->nodes);
844
	memset(tree, '\0', sizeof(NODE));
845
	tree->type = Node_array_tree;
846
}
847
848
849
/* tree_remove --- If the integer subscript is in the HAT, remove it */
850
851
static int
852
tree_remove(NODE *symbol, NODE *tree, long k)
853
{
854
	int i;
855
	NODE *tn;
856
857
	i = (k - tree->array_base) / tree->array_size;
858
	assert(i >= 0);
859
	tn = tree->nodes[i];
860
	if (tn == NULL)
319.1.9 by Arnold D. Robbins
Move to use of bool type, true, false, everywhere.
861
		return false;
281.1.1 by john haque
Speed/memory performance improvements.
862
863
	if (tn->type == Node_array_tree
864
			&& ! tree_remove(symbol, tn, k))
319.1.9 by Arnold D. Robbins
Move to use of bool type, true, false, everywhere.
865
		return false;
281.1.1 by john haque
Speed/memory performance improvements.
866
	else if (tn->type == Node_array_leaf
867
			&& ! leaf_remove(symbol, tn, k))
319.1.9 by Arnold D. Robbins
Move to use of bool type, true, false, everywhere.
868
		return false;
281.1.1 by john haque
Speed/memory performance improvements.
869
870
	if (tn->table_size == 0) {
871
		freenode(tn);
872
		tree->nodes[i] = NULL;
873
	}
874
875
	/* one less item in array */
876
	if (--tree->table_size == 0) {
877
		efree(tree->nodes);
878
		memset(tree, '\0', sizeof(NODE));
879
		tree->type = Node_array_tree;
880
	}
319.1.9 by Arnold D. Robbins
Move to use of bool type, true, false, everywhere.
881
	return true;
281.1.1 by john haque
Speed/memory performance improvements.
882
}
883
884
885
/* tree_find --- locate an interger subscript in the HAT */
886
887
static inline NODE **
888
tree_find(NODE *tree, long k, int i)
889
{
890
	NODE *tn;
891
892
	assert(tree->nodes != NULL);
893
	tn = tree->nodes[i];
894
	if (tn != NULL) {
895
		if (tn->type == Node_array_tree)
896
			return tree_exists(tn, k);
897
		return leaf_exists(tn, k);
898
	}
899
	return NULL;
900
}
901
902
903
/* tree_list --- return a list of items in the HAT */
904
905
static long
322 by john haque
Improve array interface.
906
tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind)
281.1.1 by john haque
Speed/memory performance improvements.
907
{
908
	NODE *tn;
909
	size_t j, cj, hsize;
910
	long k = 0;
911
912
	assert(list != NULL);
913
914
	hsize = tree->array_size;
915
	if ((tree->flags & HALFHAT) != 0)
916
		hsize /= 2;
917
918
	for (j = 0; j < hsize; j++) {
333 by Arnold D. Robbins
Merge branch 'master' into array-iface
919
		cj = (assoc_kind & ADESC) != 0 ? (hsize - 1 - j) : j;
281.1.1 by john haque
Speed/memory performance improvements.
920
		tn = tree->nodes[cj];
921
		if (tn == NULL)
922
			continue;
923
		if (tn->type == Node_array_tree)
322 by john haque
Improve array interface.
924
			k += tree_list(tn, list + k, assoc_kind);
281.1.1 by john haque
Speed/memory performance improvements.
925
		else
322 by john haque
Improve array interface.
926
			k += leaf_list(tn, list + k, assoc_kind);
927
		if ((assoc_kind & ADELETE) != 0 && k >= 1)
281.1.1 by john haque
Speed/memory performance improvements.
928
			return k;
929
	}
930
	return k;
931
}
932
933
934
/* tree_copy --- duplicate a HAT */
935
936
static void
937
tree_copy(NODE *newsymb, NODE *tree, NODE *newtree)
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
938
{
281.1.1 by john haque
Speed/memory performance improvements.
939
	NODE **old, **new;
940
	size_t j, hsize;
941
942
	hsize = tree->array_size;
943
	if ((tree->flags & HALFHAT) != 0)
944
		hsize /= 2;
945
731.9.23 by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro.
946
	ezalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy");
281.1.1 by john haque
Speed/memory performance improvements.
947
	newtree->nodes = new;
948
	newtree->array_base = tree->array_base;
949
	newtree->array_size = tree->array_size;
950
	newtree->table_size = tree->table_size;
951
	newtree->flags = tree->flags;
952
953
	old = tree->nodes;
954
	for (j = 0; j < hsize; j++) {
955
		if (old[j] == NULL)
956
			continue;
957
		if (old[j]->type == Node_array_tree) {
958
			new[j] = make_node(Node_array_tree);
959
			tree_copy(newsymb, old[j], new[j]);
960
		} else {
961
			new[j] = make_node(Node_array_leaf);
962
			leaf_copy(newsymb, old[j], new[j]);
963
		}
964
	}
965
}
966
967
968
/* tree_info --- print index, value info */
969
970
static void
971
tree_info(NODE *tree, NODE *ndump, const char *aname)
972
{
973
	NODE *tn;
974
	size_t j, hsize;
975
976
	hsize = tree->array_size;
977
	if ((tree->flags & HALFHAT) != 0)
978
		hsize /= 2;
979
980
	for (j = 0; j < hsize; j++) {
981
		tn = tree->nodes[j];
982
		if (tn == NULL)
983
			continue;
984
		if (tn->type == Node_array_tree)
985
			tree_info(tn, ndump, aname);
986
		else
987
			leaf_info(tn, ndump, aname);
988
	}
989
}
990
991
992
/* tree_kilobytes --- calculate memory consumption of a HAT */
993
994
static size_t
995
tree_kilobytes(NODE *tree)
996
{
997
	NODE *tn;
998
	size_t j, hsize;
999
	size_t sz = 0;
1000
1001
	hsize = tree->array_size;
1002
	if ((tree->flags & HALFHAT) != 0)
1003
		hsize /= 2;
1004
	for (j = 0; j < hsize; j++) {
1005
		tn = tree->nodes[j];
1006
		if (tn == NULL)
1007
			continue;
1008
		sz += sizeof(NODE);	/* Node_array_tree or Node_array_leaf */
1009
		if (tn->type == Node_array_tree)
1010
			sz += tree_kilobytes(tn);
1011
	}
1012
	sz += hsize * sizeof(NODE *);	/* tree->nodes */
1013
	return sz;
1014
}
1015
1016
#ifdef ARRAYDEBUG
1017
1018
/* tree_print --- print the HAT structures */
1019
1020
static void
1021
tree_print(NODE *tree, size_t bi, int indent_level)
1022
{
1023
	NODE *tn;
1024
	size_t j, hsize;
1025
1026
	indent(indent_level);
1027
1028
	hsize = tree->array_size;
1029
	if ((tree->flags & HALFHAT) != 0)
1030
		hsize /= 2;
319.2.21 by Arnold D. Robbins
Minor fixes for printf compile warnings.
1031
	fprintf(output_fp, "%4lu:%s[%4lu:%-4lu]\n",
1032
			(unsigned long) bi,
319.1.140 by Arnold D. Robbins
Make bitflag checking consistent everywhere.
1033
			(tree->flags & HALFHAT) != 0 ? "HH" : "H",
281.1.1 by john haque
Speed/memory performance improvements.
1034
			(unsigned long) hsize, (unsigned long) tree->table_size);
1035
1036
	for (j = 0; j < hsize; j++) {
1037
		tn = tree->nodes[j];
1038
		if (tn == NULL)
1039
			continue;
1040
		if (tn->type == Node_array_tree)
1041
			tree_print(tn, j, indent_level + 1);
1042
		else
1043
			leaf_print(tn, j, indent_level + 1);
1044
	}
1045
}
1046
#endif
1047
1048
/*--------------------- leaf (linear 1-D) array --------------------*/
1049
321 by john haque
Polish array handling code.
1050
/*
1051
 * leaf_lookup --- find an integer subscript in the array; Install it if
1052
 *	it isn't there.
1053
 */
281.1.1 by john haque
Speed/memory performance improvements.
1054
1055
static inline NODE **
1056
leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base)
1057
{
1058
	NODE **lhs;
1059
1060
	if (array->nodes == NULL) {
1061
		array->table_size = 0;	/* sanity */
1062
		array->array_size = size;
1063
		array->array_base = base;
731.9.23 by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro.
1064
		ezalloc(array->nodes, NODE **, size * sizeof(NODE *), "leaf_lookup");
281.1.1 by john haque
Speed/memory performance improvements.
1065
		symbol->array_capacity += size;
1066
	}
1067
1068
	lhs = array->nodes + (k - base); /* leaf element */
1069
	if (*lhs == NULL) {
1070
		array->table_size++;	/* one more element in leaf array */
1720.1.27 by Arnold D. Robbins
Squashed commit of the following:
1071
		*lhs = new_array_element();
281.1.1 by john haque
Speed/memory performance improvements.
1072
	}
1073
	return lhs;
1074
}
1075
1076
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
1077
/* leaf_exists --- check if the array contains an integer subscript */
281.1.1 by john haque
Speed/memory performance improvements.
1078
1079
static inline NODE **
1080
leaf_exists(NODE *array, long k)
1081
{
1082
	NODE **lhs;
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
1083
	lhs = array->nodes + (k - array->array_base);
281.1.1 by john haque
Speed/memory performance improvements.
1084
	return (*lhs != NULL) ? lhs : NULL;
1085
}
1086
1087
1088
/* leaf_clear --- flush all values in the array */
1089
1090
static void
1091
leaf_clear(NODE *array)
1092
{
1093
	long i, size = array->array_size;
1094
	NODE *r;
1095
1096
	for (i = 0; i < size; i++) {
1097
		r = array->nodes[i];
1098
		if (r == NULL)
1099
			continue;
1100
		if (r->type == Node_var_array) {
1101
			assoc_clear(r);		/* recursively clear all sub-arrays */
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
1102
			efree(r->vname);
281.1.1 by john haque
Speed/memory performance improvements.
1103
			freenode(r);
1104
		} else
1105
			unref(r);
1106
	}
1107
	efree(array->nodes);
1108
	array->nodes = NULL;
1109
	array->array_size = array->table_size = 0;
1110
}
1111
1112
1113
/* leaf_remove --- remove an integer subscript from the array */
1114
1115
static int
1116
leaf_remove(NODE *symbol, NODE *array, long k)
1117
{
1118
	NODE **lhs;
1119
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
1120
	lhs = array->nodes + (k - array->array_base);
281.1.1 by john haque
Speed/memory performance improvements.
1121
	if (*lhs == NULL)
319.1.9 by Arnold D. Robbins
Move to use of bool type, true, false, everywhere.
1122
		return false;
281.1.1 by john haque
Speed/memory performance improvements.
1123
	*lhs = NULL;
1124
	if (--array->table_size == 0) {
1125
		efree(array->nodes);
1126
		array->nodes = NULL;
1127
		symbol->array_capacity -= array->array_size;
1128
		array->array_size = 0;	/* sanity */
1129
	}
319.1.9 by Arnold D. Robbins
Move to use of bool type, true, false, everywhere.
1130
	return true;
281.1.1 by john haque
Speed/memory performance improvements.
1131
}
1132
1133
1134
/* leaf_copy --- duplicate a leaf array */
1135
1136
static void
1137
leaf_copy(NODE *newsymb, NODE *array, NODE *newarray)
1138
{
1139
	NODE **old, **new;
1140
	long size, i;
1141
1142
	size = array->array_size;
731.9.23 by Andrew J. Schorr
Replace malloc+memset with calloc, mostly by using the new ezalloc macro.
1143
	ezalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy");
281.1.1 by john haque
Speed/memory performance improvements.
1144
	newarray->nodes = new;
1145
	newarray->array_size = size;
1146
	newarray->array_base = array->array_base;
1147
	newarray->flags = array->flags;
1148
	newarray->table_size = array->table_size;
1149
1150
	old = array->nodes;
1151
	for (i = 0; i < size; i++) {
1152
		if (old[i] == NULL)
1153
			continue;
1154
		if (old[i]->type == Node_val)
1155
			new[i] = dupnode(old[i]);
1156
		else {
1157
			NODE *r;
1158
			r = make_array();
1159
			r->vname = estrdup(old[i]->vname, strlen(old[i]->vname));
1160
			r->parent_array = newsymb;
1161
			new[i] = assoc_copy(old[i], r);
1162
		}
1163
	}
1164
}
1165
1166
1167
/* leaf_list --- return a list of items */
1168
1169
static long
322 by john haque
Improve array interface.
1170
leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind)
281.1.1 by john haque
Speed/memory performance improvements.
1171
{
1172
	NODE *r, *subs;
1173
	long num, i, ci, k = 0;
1174
	long size = array->array_size;
1175
	static char buf[100];
1176
1177
	for (i = 0; i < size; i++) {
333 by Arnold D. Robbins
Merge branch 'master' into array-iface
1178
		ci = (assoc_kind & ADESC) != 0 ? (size - 1 - i) : i;
281.1.1 by john haque
Speed/memory performance improvements.
1179
		r = array->nodes[ci];
1180
		if (r == NULL)
1181
			continue;
1182
1183
		/* index */
1184
		num = array->array_base + ci;
333 by Arnold D. Robbins
Merge branch 'master' into array-iface
1185
		if ((assoc_kind & AISTR) != 0) {
408.26.83 by Arnold D. Robbins
Remove trailing whitespace everywhere. Fix Unicode into ASCII.
1186
			sprintf(buf, "%ld", num);
281.1.1 by john haque
Speed/memory performance improvements.
1187
			subs = make_string(buf, strlen(buf));
1188
			subs->numbr = num;
1189
			subs->flags |= (NUMCUR|NUMINT);
1190
		} else {
1191
			subs = make_number((AWKNUM) num);
1192
			subs->flags |= (INTIND|NUMINT);
1193
		}
1194
		list[k++] = subs;
1195
1196
		/* value */
333 by Arnold D. Robbins
Merge branch 'master' into array-iface
1197
		if ((assoc_kind & AVALUE) != 0) {
281.1.1 by john haque
Speed/memory performance improvements.
1198
			if (r->type == Node_val) {
322 by john haque
Improve array interface.
1199
				if ((assoc_kind & AVNUM) != 0)
281.1.1 by john haque
Speed/memory performance improvements.
1200
					(void) force_number(r);
322 by john haque
Improve array interface.
1201
				else if ((assoc_kind & AVSTR) != 0)
281.1.1 by john haque
Speed/memory performance improvements.
1202
					r = force_string(r);
1203
			}
1204
			list[k++] = r;
1205
		}
322 by john haque
Improve array interface.
1206
		if ((assoc_kind & ADELETE) != 0 && k >= 1)
281.1.1 by john haque
Speed/memory performance improvements.
1207
			return k;
1208
	}
1209
1210
	return k;
1211
}
1212
1213
1214
/* leaf_info --- print index, value info */
1215
1216
static void
1217
leaf_info(NODE *array, NODE *ndump, const char *aname)
1218
{
1219
	NODE *subs, *val;
1220
	size_t i, size;
1221
1222
	size = array->array_size;
1223
1224
	subs = make_number((AWKNUM) 0.0);
1225
	subs->flags |= (INTIND|NUMINT);
1226
	for (i = 0; i < size; i++) {
1227
		val = array->nodes[i];
1228
		if (val == NULL)
1229
			continue;
1230
		subs->numbr = array->array_base + i;
1231
		assoc_info(subs, val, ndump, aname);
1232
	}
1233
	unref(subs);
1234
}
1235
1236
#ifdef ARRAYDEBUG
1237
1238
/* leaf_print --- print the leaf-array structure */
1239
1240
1241
static void
1242
leaf_print(NODE *array, size_t bi, int indent_level)
1243
{
1244
	indent(indent_level);
319.2.21 by Arnold D. Robbins
Minor fixes for printf compile warnings.
1245
	fprintf(output_fp, "%4lu:L[%4lu:%-4lu]\n",
1246
			(unsigned long) bi,
281.1.1 by john haque
Speed/memory performance improvements.
1247
			(unsigned long) array->array_size,
1248
			(unsigned long) array->table_size);
1249
}
1250
#endif
1146.1.22 by Arnold D. Robbins
Improve sandbox mode.
1251
1252
static NODE *argv_shadow_array = NULL;
1253
1254
/* argv_store --- post assign function for ARGV in sandbox mode */
1255
1256
static NODE **
1257
argv_store(NODE *symbol, NODE *subs)
1258
{
1259
	NODE **val = cint_exists(symbol, subs);
1260
	NODE *newval = *val;
1261
	char *cp;
1262
1263
	if (newval->stlen == 0)	// empty strings in ARGV are OK
1264
		return val;
1265
1266
	if ((cp = strchr(newval->stptr, '=')) == NULL) {
1267
		if (! in_array(argv_shadow_array, newval))
1268
			fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
1269
				(int) newval->stlen, newval->stptr);
1270
	} else {
1271
		// check if it's a valid variable assignment
1272
		bool badvar = false;
1273
		char *arg = newval->stptr;
1274
		char *cp2;
1275
1276
		*cp = '\0';	// temporarily
1277
1278
		if (! is_letter((unsigned char) arg[0]))
1279
			badvar = true;
1280
		else
1281
			for (cp2 = arg+1; *cp2; cp2++)
1282
				if (! is_identchar((unsigned char) *cp2) && *cp2 != ':') {
1283
					badvar = true;
1284
					break;
1285
				}
1286
1287
		// further checks
1288
		if (! badvar) {
1289
			char *cp = strchr(arg, ':');
1290
			if (cp && (cp[1] != ':' || strchr(cp + 2, ':') != NULL))
1291
				badvar = true;
1292
		}
1293
		*cp = '=';	// restore the '='
1294
1295
		if (badvar && ! in_array(argv_shadow_array, newval))
1296
			fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
1297
				(int) newval->stlen, newval->stptr);
1298
1299
		// otherwise, badvar is false, let it through as variable assignment
1300
	}
1301
	return val;
1302
}
1303
1304
/* init_argv_array --- set up the pointers for ARGV in sandbox mode. A bit hacky. */
1305
1306
void
1307
init_argv_array(NODE *argv_node, NODE *shadow_node)
1308
{
1309
	/* If POSIX simply don't reset the vtable and things work as before */
1310
	if (! do_sandbox)
1311
		return;
1312
1313
	argv_node->array_funcs = & argv_array_func;
1314
	argv_shadow_array = shadow_node;
1315
}