28
extern void assoc_clear();
29
extern int a_get_three();
32
static char *get_fs();
33
static int re_split();
34
static int parse_fields();
35
static void set_element();
37
char *line_buf = NULL; /* holds current input line */
29
static int (*parse_field) P((int, char **, int, char *,
30
Regexp *, void (*)(), NODE *));
31
static void rebuild_record P((void));
32
static int re_parse_field P((int, char **, int, char *,
33
Regexp *, void (*)(), NODE *));
34
static int def_parse_field P((int, char **, int, char *,
35
Regexp *, void (*)(), NODE *));
36
static int sc_parse_field P((int, char **, int, char *,
37
Regexp *, void (*)(), NODE *));
38
static int fw_parse_field P((int, char **, int, char *,
39
Regexp *, void (*)(), NODE *));
40
static void set_element P((int, char *, int, NODE *));
42
static Regexp *FS_regexp = NULL;
39
43
static char *parse_extent; /* marks where to restart parse of record */
40
44
static int parse_high_water=0; /* field number that we have parsed so far */
41
static char f_empty[] = "";
42
static char *save_fs = " "; /* save current value of FS when line is read,
45
static int nf_high_water = 0; /* size of fields_arr */
46
static char f_empty[] = "\0";
49
static char *save_fs; /* save current value of FS when line is read,
43
50
* to be used in deferred parsing
47
53
NODE **fields_arr; /* array of pointers to the field nodes */
48
NODE node0; /* node for $0 which never gets free'd */
49
int node0_valid = 1; /* $(>0) has not been changed yet */
54
int field0_valid = 1; /* $(>0) has not been changed yet */
56
static NODE **nodes; /* permanent repository of field nodes */
57
static int *FIELDWIDTHS = NULL;
54
62
emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
55
node0.type = Node_val;
58
node0.flags = (STR|PERM); /* never free buf */
59
fields_arr[0] = &node0;
63
* Danger! Must only be called for fields we know have just been blanked, or
64
* fields we know don't exist yet.
63
emalloc(nodes, NODE **, sizeof(NODE *), "init_fields");
64
emalloc(field0, NODE *, sizeof(NODE), "init_fields");
65
field0->type = Node_val;
68
field0->flags = (STRING|STR|PERM); /* never free buf */
69
fields_arr[0] = field0;
70
save_FS = dupnode(FS_node->var_value);
71
save_fs = save_FS->stptr;
81
erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
82
erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field");
83
for (t = nf_high_water+1; t <= num; t++) {
87
fields_arr[t] = nodes[t];
143
162
* or to NF. At that point, parse only as much as necessary.
165
set_record(buf, cnt, freeold)
152
assign_number(&NF_node->var_value, (AWKNUM)-1);
153
173
for (i = 1; i <= parse_high_water; i++) {
154
deref = fields_arr[i];
174
unref(fields_arr[i]);
157
176
parse_high_water = 0;
159
if (buf == line_buf) {
160
deref = fields_arr[0];
163
node0.type = Node_val;
167
node0.flags = (STR|PERM); /* never free buf */
168
fields_arr[0] = &node0;
178
unref(fields_arr[0]);
182
save_FS = dupnode(FS_node->var_value);
183
save_fs = save_FS->stptr;
188
field0->flags = (STRING|STR|PERM|MAYBE_NUM);
189
fields_arr[0] = field0;
191
fields_arr[0]->flags |= MAYBE_NUM;
198
(void) force_string(fields_arr[0]);
199
set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0);
205
NF = (int) force_number(NF_node->var_value);
210
* this is called both from get_field() and from do_split()
211
* via (*parse_field)(). This variation is for when FS is a regular
212
* expression -- either user-defined or because RS=="" and FS==" "
215
re_parse_field(up_to, buf, len, fs, rp, set, n)
216
int up_to; /* parse only up to this field number */
217
char **buf; /* on input: string to parse; on output: point to start next */
221
void (*set) (); /* routine to set the value of the parsed field */
224
register char *scan = *buf;
225
register int nf = parse_high_water;
226
register char *field;
227
register char *end = scan + len;
235
cp = FS_node->var_value->stptr;
236
if (*RS == 0 && *cp == ' ' && *(cp+1) == '\0') {
238
&& (*scan == '\n' || *scan == ' ' || *scan == '\t'))
243
&& research(rp, scan, (int)(end - scan), 1) != -1
245
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
248
(*set)(++nf, field, scan - field, n);
254
(*set)(++nf, field, RESTART(rp, scan), n);
255
scan += REEND(rp, scan);
258
if (nf != up_to && *RS != 0 && scan < end) {
259
(*set)(++nf, scan, (int)(end - scan), n);
267
* this is called both from get_field() and from do_split()
268
* via (*parse_field)(). This variation is for when FS is a single space
272
def_parse_field(up_to, buf, len, fs, rp, set, n)
273
int up_to; /* parse only up to this field number */
274
char **buf; /* on input: string to parse; on output: point to start next */
278
void (*set) (); /* routine to set the value of the parsed field */
281
register char *scan = *buf;
282
register int nf = parse_high_water;
283
register char *field;
284
register char *end = scan + len;
291
*end = ' '; /* sentinel character */
292
for (; nf < up_to; scan++) {
294
* special case: fs is single space, strip leading whitespace
296
while (scan < end && (*scan == ' ' || *scan == '\t'))
301
while (*scan != ' ' && *scan != '\t')
303
(*set)(++nf, field, (int)(scan - field), n);
312
* this is called both from get_field() and from do_split()
313
* via (*pase_field)(). This variation is for when FS is a single character
317
sc_parse_field(up_to, buf, len, fs, rp, set, n)
318
int up_to; /* parse only up to this field number */
319
char **buf; /* on input: string to parse; on output: point to start next */
323
void (*set) (); /* routine to set the value of the parsed field */
326
register char *scan = *buf;
327
register char fschar = *fs;
328
register int nf = parse_high_water;
329
register char *field;
330
register char *end = scan + len;
336
*end = fschar; /* sentinel character */
337
for (; nf < up_to; scan++) {
339
while (*scan++ != fschar)
342
(*set)(++nf, field, (int)(scan - field), n);
351
* this is called both from get_field() and from do_split()
352
* via (*pase_field)(). This variation is for when FS is a single character
356
fw_parse_field(up_to, buf, len, fs, rp, set, n)
357
int up_to; /* parse only up to this field number */
358
char **buf; /* on input: string to parse; on output: point to start next */
362
void (*set) (); /* routine to set the value of the parsed field */
365
register char *scan = *buf;
366
register int nf = parse_high_water;
367
register char *end = scan + len;
373
for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
374
if (len > end - scan)
376
(*set)(++nf, scan, len, n);
173
387
get_field(num, assign)
175
int assign; /* this field is on the LHS of an assign */
389
Func_ptr *assign; /* this field is on the LHS of an assign */
180
394
* if requesting whole line but some other field has been altered,
181
395
* then the whole line must be rebuilt
183
if (num == 0 && (node0_valid == 0 || assign)) {
184
/* first, parse remainder of input record */
185
if (NF_node->var_value->numbr == -1) {
186
if (parse_high_water == 0)
187
parse_extent = node0.stptr;
188
n = parse_fields(HUGE-1, &parse_extent,
189
node0.stlen - (parse_extent - node0.stptr),
190
save_fs, set_field, (NODE *)NULL);
191
assign_number(&NF_node->var_value, (AWKNUM)n);
193
if (node0_valid == 0)
399
/* first, parse remainder of input record */
401
NF = (*parse_field)(HUGE-1, &parse_extent,
402
fields_arr[0]->stlen -
403
(parse_extent - fields_arr[0]->stptr),
404
save_fs, FS_regexp, set_field,
406
parse_high_water = NF;
194
408
rebuild_record();
411
*assign = reset_record;
195
412
return &fields_arr[0];
197
if (num > 0 && assign)
415
/* assert(num > 0); */
199
419
if (num <= parse_high_water) /* we have already parsed this field */
200
420
return &fields_arr[num];
201
if (parse_high_water == 0 && num > 0) /* starting at the beginning */
421
if (parse_high_water == 0) /* starting at the beginning */
202
422
parse_extent = fields_arr[0]->stptr;
204
424
* parse up to num fields, calling set_field() for each, and saving
205
425
* in parse_extent the point where the parse left off
207
n = parse_fields(num, &parse_extent,
427
n = (*parse_field)(num, &parse_extent,
208
428
fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
209
save_fs, set_field, (NODE *)NULL);
429
save_fs, FS_regexp, set_field, (NODE *)NULL);
430
parse_high_water = n;
210
431
if (num == HUGE-1)
212
if (n < num) { /* requested field number beyond end of record;
213
* set_field will just extend the number of fields,
216
set_field(num, f_empty, 0, (NODE *) NULL);
433
if (n < num) { /* requested field number beyond end of record; */
436
if (num > nf_high_water)
437
grow_fields_arr(num);
439
/* fill in fields that don't exist */
440
for (i = n + 1; i <= num; i++)
441
fields_arr[i] = Nnull_string;
218
444
* if this field is onthe LHS of an assignment, then we want to
219
445
* set NF to this value, below
229
455
* only gets set if the field is assigned to -- in this case n has
230
456
* been set to num above
232
if (*parse_extent == '\0')
233
assign_number(&NF_node->var_value, (AWKNUM)n);
458
if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
235
461
return &fields_arr[num];
239
* this is called both from get_field() and from do_split()
242
parse_fields(up_to, buf, len, fs, set, n)
243
int up_to; /* parse only up to this field number */
244
char **buf; /* on input: string to parse; on output: point to start next */
465
set_element(num, s, len, n)
247
void (*set) (); /* routine to set the value of the parsed field */
251
register char *field;
253
register char *end = s + len;
254
int NF = parse_high_water;
260
if (*fs && *(fs + 1) != '\0') { /* fs is a regexp */
261
struct re_registers reregs;
264
if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) {
265
while ((*scan == '\n' || *scan == ' ' || *scan == '\t')
271
&& re_split(scan, (int)(end - scan), fs, &reregs) != -1
273
if (reregs.end[0] == 0) { /* null match */
276
(*set)(++NF, s, scan - s, n);
282
(*set)(++NF, s, scan - s + reregs.start[0], n);
283
scan += reregs.end[0];
286
if (NF != up_to && scan <= end) {
287
if (!(rs == 0 && scan == end)) {
288
(*set)(++NF, scan, (int)(end - scan), n);
295
for (scan = s; scan < end && NF < up_to; scan++) {
297
* special case: fs is single space, strip leading
301
while ((*scan == ' ' || *scan == '\t') && scan < end)
308
while (*scan != ' ' && *scan != '\t' && scan < end)
311
while (*scan != *fs && scan < end)
313
if (rs && scan == end-1 && *scan == *fs) {
314
(*set)(++NF, field, (int)(scan - field), n);
318
(*set)(++NF, field, (int)(scan - field), n);
327
re_split(buf, len, fs, reregsp)
330
struct re_registers *reregsp;
332
typedef struct re_pattern_buffer RPAT;
334
static char *last_fs = NULL;
336
if ((last_fs != NULL && !STREQ(fs, last_fs))
337
|| (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0)
338
^ (rp->translate != NULL))))
340
/* fs has changed or IGNORECASE has changed */
347
if (last_fs == NULL) { /* first time */
348
emalloc(rp, RPAT *, sizeof(RPAT), "re_split");
349
memset((char *) rp, 0, sizeof(RPAT));
350
emalloc(rp->buffer, char *, 8, "re_split");
352
emalloc(rp->fastmap, char *, 256, "re_split");
353
emalloc(last_fs, char *, strlen(fs) + 1, "re_split");
354
(void) strcpy(last_fs, fs);
355
if (! strict && IGNORECASE_node->var_value->numbr != 0.0)
356
rp->translate = casetable;
358
rp->translate = NULL;
359
if (re_compile_pattern(fs, strlen(fs), rp) != NULL)
360
fatal("illegal regular expression for FS: `%s'", fs);
362
return re_search(rp, buf, len, 0, len, reregsp);
473
it = make_string(s, len);
474
it->flags |= MAYBE_NUM;
475
*assoc_lookup(n, tmp_number((AWKNUM) (num))) = it;
370
register char *splitc;
482
NODE *t1, *t2, *t3, *tmp;
483
register char *splitc = "";
374
if (a_get_three(tree, &t1, &t2, &t3) < 3)
377
splitc = force_string(t3)->stptr;
488
t1 = tree_eval(tree->lnode);
489
t2 = tree->rnode->lnode;
490
t3 = tree->rnode->rnode->lnode;
492
(void) force_string(t1);
380
494
if (t2->type == Node_param_list)
381
n = stack_ptr[t2->param_cnt];
382
if (n->type != Node_var && n->type != Node_var_array)
495
t2 = stack_ptr[t2->param_cnt];
496
if (t2->type != Node_var && t2->type != Node_var_array)
383
497
fatal("second argument of split is not a variable");
386
tree = force_string(t1);
389
return tmp_number((AWKNUM)
390
parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n));
500
if (t3->re_flags & FS_DFLT) {
501
parseit = parse_field;
505
tmp = force_string(tree_eval(t3->re_exp));
506
if (tmp->stlen == 1) {
507
if (tmp->stptr[0] == ' ') {
508
parseit = def_parse_field;
510
parseit = sc_parse_field;
514
parseit = re_parse_field;
521
tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, t1->stlen,
522
splitc, rp, set_element, t2));
396
530
register NODE *tmp;
397
531
static char buf[10];
537
parse_field = def_parse_field;
399
538
tmp = force_string(FS_node->var_value);
541
parse_field = re_parse_field;
401
543
if (tmp->stlen == 1) {
402
544
if (tmp->stptr[0] == ' ')
403
545
(void) strcpy(buf, "[ \n]+");
546
else if (tmp->stptr[0] != '\n')
405
547
sprintf(buf, "[%c\n]", tmp->stptr[0]);
549
parse_field = sc_parse_field;
406
552
} else if (tmp->stlen == 0) {
555
parse_field = sc_parse_field;
417
set_element(num, s, len, n)
423
*assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len);
560
parse_field = re_parse_field;
561
else if (*FS != ' ' && tmp->stlen == 1)
562
parse_field = sc_parse_field;
564
if (parse_field == re_parse_field) {
565
tmp = tmp_string(FS, strlen(FS));
566
FS_regexp = make_regexp(tmp, 0, 1);
576
(void) force_string(RS_node->var_value);
577
RS = RS_node->var_value->stptr;
587
static int fw_alloc = 1;
588
static int warned = 0;
590
if (do_lint && ! warned) {
592
warning("use of FIELDWIDTHS is a gawk extension");
594
if (strict) /* quick and dirty, does the trick */
597
parse_field = fw_parse_field;
598
scan = force_string(FIELDWIDTHS_node->var_value)->stptr;
600
if (FIELDWIDTHS == NULL)
601
emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
606
erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
608
FIELDWIDTHS[i] = (int) strtol(scan, &end, 10);