~ubuntu-branches/ubuntu/lucid/9base/lucid

« back to all changes in this revision

Viewing changes to awk/lex.c

  • Committer: Bazaar Package Importer
  • Author(s): Daniel Baumann
  • Date: 2006-01-25 15:33:00 UTC
  • Revision ID: james.westby@ubuntu.com-20060125153300-6hh4p9wx8iqqply5
Tags: upstream-2
ImportĀ upstreamĀ versionĀ 2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/****************************************************************
 
2
Copyright (C) Lucent Technologies 1997
 
3
All Rights Reserved
 
4
 
 
5
Permission to use, copy, modify, and distribute this software and
 
6
its documentation for any purpose and without fee is hereby
 
7
granted, provided that the above copyright notice appear in all
 
8
copies and that both that the copyright notice and this
 
9
permission notice and warranty disclaimer appear in supporting
 
10
documentation, and that the name Lucent Technologies or any of
 
11
its entities not be used in advertising or publicity pertaining
 
12
to distribution of the software without specific, written prior
 
13
permission.
 
14
 
 
15
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 
16
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
 
17
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
 
18
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 
19
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 
20
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 
21
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 
22
THIS SOFTWARE.
 
23
****************************************************************/
 
24
 
 
25
#include <stdio.h>
 
26
#include <stdlib.h>
 
27
#include <string.h>
 
28
#include <ctype.h>
 
29
#include "awk.h"
 
30
#include "y.tab.h"
 
31
 
 
32
extern YYSTYPE  yylval;
 
33
extern int      infunc;
 
34
 
 
35
int     lineno  = 1;
 
36
int     bracecnt = 0;
 
37
int     brackcnt  = 0;
 
38
int     parencnt = 0;
 
39
 
 
40
typedef struct Keyword {
 
41
        char    *word;
 
42
        int     sub;
 
43
        int     type;
 
44
} Keyword;
 
45
 
 
46
Keyword keywords[] ={   /* keep sorted: binary searched */
 
47
        { "BEGIN",      XBEGIN,         XBEGIN },
 
48
        { "END",        XEND,           XEND },
 
49
        { "NF",         VARNF,          VARNF },
 
50
        { "atan2",      FATAN,          BLTIN },
 
51
        { "break",      BREAK,          BREAK },
 
52
        { "close",      CLOSE,          CLOSE },
 
53
        { "continue",   CONTINUE,       CONTINUE },
 
54
        { "cos",        FCOS,           BLTIN },
 
55
        { "delete",     DELETE,         DELETE },
 
56
        { "do",         DO,             DO },
 
57
        { "else",       ELSE,           ELSE },
 
58
        { "exit",       EXIT,           EXIT },
 
59
        { "exp",        FEXP,           BLTIN },
 
60
        { "fflush",     FFLUSH,         BLTIN },
 
61
        { "for",        FOR,            FOR },
 
62
        { "func",       FUNC,           FUNC },
 
63
        { "function",   FUNC,           FUNC },
 
64
        { "getline",    GETLINE,        GETLINE },
 
65
        { "gsub",       GSUB,           GSUB },
 
66
        { "if",         IF,             IF },
 
67
        { "in",         IN,             IN },
 
68
        { "index",      INDEX,          INDEX },
 
69
        { "int",        FINT,           BLTIN },
 
70
        { "length",     FLENGTH,        BLTIN },
 
71
        { "log",        FLOG,           BLTIN },
 
72
        { "match",      MATCHFCN,       MATCHFCN },
 
73
        { "next",       NEXT,           NEXT },
 
74
        { "nextfile",   NEXTFILE,       NEXTFILE },
 
75
        { "print",      PRINT,          PRINT },
 
76
        { "printf",     PRINTF,         PRINTF },
 
77
        { "rand",       FRAND,          BLTIN },
 
78
        { "return",     RETURN,         RETURN },
 
79
        { "sin",        FSIN,           BLTIN },
 
80
        { "split",      SPLIT,          SPLIT },
 
81
        { "sprintf",    SPRINTF,        SPRINTF },
 
82
        { "sqrt",       FSQRT,          BLTIN },
 
83
        { "srand",      FSRAND,         BLTIN },
 
84
        { "sub",        SUB,            SUB },
 
85
        { "substr",     SUBSTR,         SUBSTR },
 
86
        { "system",     FSYSTEM,        BLTIN },
 
87
        { "tolower",    FTOLOWER,       BLTIN },
 
88
        { "toupper",    FTOUPPER,       BLTIN },
 
89
        { "while",      WHILE,          WHILE },
 
90
        { "utf",        FUTF,           BLTIN },
 
91
};
 
92
 
 
93
#define DEBUG
 
94
#ifdef  DEBUG
 
95
#define RET(x)  { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
 
96
#else
 
97
#define RET(x)  return(x)
 
98
#endif
 
99
 
 
100
int peek(void)
 
101
{
 
102
        int c = input();
 
103
        unput(c);
 
104
        return c;
 
105
}
 
106
 
 
107
int gettok(char **pbuf, int *psz)       /* get next input token */
 
108
{
 
109
        int c;
 
110
        char *buf = *pbuf;
 
111
        int sz = *psz;
 
112
        char *bp = buf;
 
113
 
 
114
        c = input();
 
115
        if (c == 0)
 
116
                return 0;
 
117
        buf[0] = c;
 
118
        buf[1] = 0;
 
119
        if (!isalnum(c) && c != '.' && c != '_')
 
120
                return c;
 
121
 
 
122
        *bp++ = c;
 
123
        if (isalpha(c) || c == '_') {   /* it's a varname */
 
124
                for ( ; (c = input()) != 0; ) {
 
125
                        if (bp-buf >= sz)
 
126
                                if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
 
127
                                        FATAL( "out of space for name %.10s...", buf );
 
128
                        if (isalnum(c) || c == '_')
 
129
                                *bp++ = c;
 
130
                        else {
 
131
                                *bp = 0;
 
132
                                unput(c);
 
133
                                break;
 
134
                        }
 
135
                }
 
136
        } else {        /* it's a number */
 
137
                char *rem;
 
138
                /* read input until can't be a number */
 
139
                for ( ; (c = input()) != 0; ) {
 
140
                        if (bp-buf >= sz)
 
141
                                if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
 
142
                                        FATAL( "out of space for number %.10s...", buf );
 
143
                        if (isdigit(c) || c == 'e' || c == 'E' 
 
144
                          || c == '.' || c == '+' || c == '-')
 
145
                                *bp++ = c;
 
146
                        else {
 
147
                                unput(c);
 
148
                                break;
 
149
                        }
 
150
                }
 
151
                *bp = 0;
 
152
                strtod(buf, &rem);      /* parse the number */
 
153
                unputstr(rem);          /* put rest back for later */
 
154
                rem[0] = 0;
 
155
        }
 
156
        *pbuf = buf;
 
157
        *psz = sz;
 
158
        return buf[0];
 
159
}
 
160
 
 
161
int     word(char *);
 
162
int     string(void);
 
163
int     regexpr(void);
 
164
int     sc      = 0;    /* 1 => return a } right now */
 
165
int     reg     = 0;    /* 1 => return a REGEXPR now */
 
166
 
 
167
int yylex(void)
 
168
{
 
169
        int c;
 
170
        static char *buf = 0;
 
171
        static int bufsize = 500;
 
172
 
 
173
        if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
 
174
                FATAL( "out of space in yylex" );
 
175
        if (sc) {
 
176
                sc = 0;
 
177
                RET('}');
 
178
        }
 
179
        if (reg) {
 
180
                reg = 0;
 
181
                return regexpr();
 
182
        }
 
183
        for (;;) {
 
184
                c = gettok(&buf, &bufsize);
 
185
                if (c == 0)
 
186
                        return 0;
 
187
                if (isalpha(c) || c == '_')
 
188
                        return word(buf);
 
189
                if (isdigit(c) || c == '.') {
 
190
                        yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
 
191
                        /* should this also have STR set? */
 
192
                        RET(NUMBER);
 
193
                }
 
194
        
 
195
                yylval.i = c;
 
196
                switch (c) {
 
197
                case '\n':      /* {EOL} */
 
198
                        RET(NL);
 
199
                case '\r':      /* assume \n is coming */
 
200
                case ' ':       /* {WS}+ */
 
201
                case '\t':
 
202
                        break;
 
203
                case '#':       /* #.* strip comments */
 
204
                        while ((c = input()) != '\n' && c != 0)
 
205
                                ;
 
206
                        unput(c);
 
207
                        break;
 
208
                case ';':
 
209
                        RET(';');
 
210
                case '\\':
 
211
                        if (peek() == '\n') {
 
212
                                input();
 
213
                        } else if (peek() == '\r') {
 
214
                                input(); input();       /* \n */
 
215
                                lineno++;
 
216
                        } else {
 
217
                                RET(c);
 
218
                        }
 
219
                        break;
 
220
                case '&':
 
221
                        if (peek() == '&') {
 
222
                                input(); RET(AND);
 
223
                        } else 
 
224
                                RET('&');
 
225
                case '|':
 
226
                        if (peek() == '|') {
 
227
                                input(); RET(BOR);
 
228
                        } else
 
229
                                RET('|');
 
230
                case '!':
 
231
                        if (peek() == '=') {
 
232
                                input(); yylval.i = NE; RET(NE);
 
233
                        } else if (peek() == '~') {
 
234
                                input(); yylval.i = NOTMATCH; RET(MATCHOP);
 
235
                        } else
 
236
                                RET(NOT);
 
237
                case '~':
 
238
                        yylval.i = MATCH;
 
239
                        RET(MATCHOP);
 
240
                case '<':
 
241
                        if (peek() == '=') {
 
242
                                input(); yylval.i = LE; RET(LE);
 
243
                        } else {
 
244
                                yylval.i = LT; RET(LT);
 
245
                        }
 
246
                case '=':
 
247
                        if (peek() == '=') {
 
248
                                input(); yylval.i = EQ; RET(EQ);
 
249
                        } else {
 
250
                                yylval.i = ASSIGN; RET(ASGNOP);
 
251
                        }
 
252
                case '>':
 
253
                        if (peek() == '=') {
 
254
                                input(); yylval.i = GE; RET(GE);
 
255
                        } else if (peek() == '>') {
 
256
                                input(); yylval.i = APPEND; RET(APPEND);
 
257
                        } else {
 
258
                                yylval.i = GT; RET(GT);
 
259
                        }
 
260
                case '+':
 
261
                        if (peek() == '+') {
 
262
                                input(); yylval.i = INCR; RET(INCR);
 
263
                        } else if (peek() == '=') {
 
264
                                input(); yylval.i = ADDEQ; RET(ASGNOP);
 
265
                        } else
 
266
                                RET('+');
 
267
                case '-':
 
268
                        if (peek() == '-') {
 
269
                                input(); yylval.i = DECR; RET(DECR);
 
270
                        } else if (peek() == '=') {
 
271
                                input(); yylval.i = SUBEQ; RET(ASGNOP);
 
272
                        } else
 
273
                                RET('-');
 
274
                case '*':
 
275
                        if (peek() == '=') {    /* *= */
 
276
                                input(); yylval.i = MULTEQ; RET(ASGNOP);
 
277
                        } else if (peek() == '*') {     /* ** or **= */
 
278
                                input();        /* eat 2nd * */
 
279
                                if (peek() == '=') {
 
280
                                        input(); yylval.i = POWEQ; RET(ASGNOP);
 
281
                                } else {
 
282
                                        RET(POWER);
 
283
                                }
 
284
                        } else
 
285
                                RET('*');
 
286
                case '/':
 
287
                        RET('/');
 
288
                case '%':
 
289
                        if (peek() == '=') {
 
290
                                input(); yylval.i = MODEQ; RET(ASGNOP);
 
291
                        } else
 
292
                                RET('%');
 
293
                case '^':
 
294
                        if (peek() == '=') {
 
295
                                input(); yylval.i = POWEQ; RET(ASGNOP);
 
296
                        } else
 
297
                                RET(POWER);
 
298
        
 
299
                case '$':
 
300
                        /* BUG: awkward, if not wrong */
 
301
                        c = gettok(&buf, &bufsize);
 
302
                        if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
 
303
                                unputstr(buf);
 
304
                                RET(INDIRECT);
 
305
                        } else if (isalpha(c)) {
 
306
                                if (strcmp(buf, "NF") == 0) {   /* very special */
 
307
                                        unputstr("(NF)");
 
308
                                        RET(INDIRECT);
 
309
                                }
 
310
                                yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
 
311
                                RET(IVAR);
 
312
                        } else {
 
313
                                unputstr(buf);
 
314
                                RET(INDIRECT);
 
315
                        }
 
316
        
 
317
                case '}':
 
318
                        if (--bracecnt < 0)
 
319
                                SYNTAX( "extra }" );
 
320
                        sc = 1;
 
321
                        RET(';');
 
322
                case ']':
 
323
                        if (--brackcnt < 0)
 
324
                                SYNTAX( "extra ]" );
 
325
                        RET(']');
 
326
                case ')':
 
327
                        if (--parencnt < 0)
 
328
                                SYNTAX( "extra )" );
 
329
                        RET(')');
 
330
                case '{':
 
331
                        bracecnt++;
 
332
                        RET('{');
 
333
                case '[':
 
334
                        brackcnt++;
 
335
                        RET('[');
 
336
                case '(':
 
337
                        parencnt++;
 
338
                        RET('(');
 
339
        
 
340
                case '"':
 
341
                        return string();        /* BUG: should be like tran.c ? */
 
342
        
 
343
                default:
 
344
                        RET(c);
 
345
                }
 
346
        }
 
347
}
 
348
 
 
349
int string(void)
 
350
{
 
351
        int c, n;
 
352
        char *s, *bp;
 
353
        static char *buf = 0;
 
354
        static int bufsz = 500;
 
355
 
 
356
        if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
 
357
                FATAL("out of space for strings");
 
358
        for (bp = buf; (c = input()) != '"'; ) {
 
359
                if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
 
360
                        FATAL("out of space for string %.10s...", buf);
 
361
                switch (c) {
 
362
                case '\n':
 
363
                case '\r':
 
364
                case 0:
 
365
                        SYNTAX( "non-terminated string %.10s...", buf );
 
366
                        lineno++;
 
367
                        break;
 
368
                case '\\':
 
369
                        c = input();
 
370
                        switch (c) {
 
371
                        case '"': *bp++ = '"'; break;
 
372
                        case 'n': *bp++ = '\n'; break;  
 
373
                        case 't': *bp++ = '\t'; break;
 
374
                        case 'f': *bp++ = '\f'; break;
 
375
                        case 'r': *bp++ = '\r'; break;
 
376
                        case 'b': *bp++ = '\b'; break;
 
377
                        case 'v': *bp++ = '\v'; break;
 
378
                        case 'a': *bp++ = '\007'; break;
 
379
                        case '\\': *bp++ = '\\'; break;
 
380
 
 
381
                        case '0': case '1': case '2': /* octal: \d \dd \ddd */
 
382
                        case '3': case '4': case '5': case '6': case '7':
 
383
                                n = c - '0';
 
384
                                if ((c = peek()) >= '0' && c < '8') {
 
385
                                        n = 8 * n + input() - '0';
 
386
                                        if ((c = peek()) >= '0' && c < '8')
 
387
                                                n = 8 * n + input() - '0';
 
388
                                }
 
389
                                *bp++ = n;
 
390
                                break;
 
391
 
 
392
                        case 'x':       /* hex  \x0-9a-fA-F + */
 
393
                            {   char xbuf[100], *px;
 
394
                                for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
 
395
                                        if (isdigit(c)
 
396
                                         || (c >= 'a' && c <= 'f')
 
397
                                         || (c >= 'A' && c <= 'F'))
 
398
                                                *px++ = c;
 
399
                                        else
 
400
                                                break;
 
401
                                }
 
402
                                *px = 0;
 
403
                                unput(c);
 
404
                                sscanf(xbuf, "%x", &n);
 
405
                                *bp++ = n;
 
406
                                break;
 
407
                            }
 
408
 
 
409
                        default: 
 
410
                                *bp++ = c;
 
411
                                break;
 
412
                        }
 
413
                        break;
 
414
                default:
 
415
                        *bp++ = c;
 
416
                        break;
 
417
                }
 
418
        }
 
419
        *bp = 0; 
 
420
        s = tostring(buf);
 
421
        *bp++ = ' '; *bp++ = 0;
 
422
        yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
 
423
        RET(STRING);
 
424
}
 
425
 
 
426
 
 
427
int binsearch(char *w, Keyword *kp, int n)
 
428
{
 
429
        int cond, low, mid, high;
 
430
 
 
431
        low = 0;
 
432
        high = n - 1;
 
433
        while (low <= high) {
 
434
                mid = (low + high) / 2;
 
435
                if ((cond = strcmp(w, kp[mid].word)) < 0)
 
436
                        high = mid - 1;
 
437
                else if (cond > 0)
 
438
                        low = mid + 1;
 
439
                else
 
440
                        return mid;
 
441
        }
 
442
        return -1;
 
443
}
 
444
 
 
445
int word(char *w) 
 
446
{
 
447
        Keyword *kp;
 
448
        int c, n;
 
449
 
 
450
        n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
 
451
        kp = keywords + n;
 
452
        if (n != -1) {  /* found in table */
 
453
                yylval.i = kp->sub;
 
454
                switch (kp->type) {     /* special handling */
 
455
                case FSYSTEM:
 
456
                        if (safe)
 
457
                                SYNTAX( "system is unsafe" );
 
458
                        RET(kp->type);
 
459
                case FUNC:
 
460
                        if (infunc)
 
461
                                SYNTAX( "illegal nested function" );
 
462
                        RET(kp->type);
 
463
                case RETURN:
 
464
                        if (!infunc)
 
465
                                SYNTAX( "return not in function" );
 
466
                        RET(kp->type);
 
467
                case VARNF:
 
468
                        yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
 
469
                        RET(VARNF);
 
470
                default:
 
471
                        RET(kp->type);
 
472
                }
 
473
        }
 
474
        c = peek();     /* look for '(' */
 
475
        if (c != '(' && infunc && (n=isarg(w)) >= 0) {
 
476
                yylval.i = n;
 
477
                RET(ARG);
 
478
        } else {
 
479
                yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
 
480
                if (c == '(') {
 
481
                        RET(CALL);
 
482
                } else {
 
483
                        RET(VAR);
 
484
                }
 
485
        }
 
486
}
 
487
 
 
488
void startreg(void)     /* next call to yyles will return a regular expression */
 
489
{
 
490
        reg = 1;
 
491
}
 
492
 
 
493
int regexpr(void)
 
494
{
 
495
        int c;
 
496
        static char *buf = 0;
 
497
        static int bufsz = 500;
 
498
        char *bp;
 
499
 
 
500
        if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
 
501
                FATAL("out of space for rex expr");
 
502
        bp = buf;
 
503
        for ( ; (c = input()) != '/' && c != 0; ) {
 
504
                if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
 
505
                        FATAL("out of space for reg expr %.10s...", buf);
 
506
                if (c == '\n') {
 
507
                        SYNTAX( "newline in regular expression %.10s...", buf ); 
 
508
                        unput('\n');
 
509
                        break;
 
510
                } else if (c == '\\') {
 
511
                        *bp++ = '\\'; 
 
512
                        *bp++ = input();
 
513
                } else {
 
514
                        *bp++ = c;
 
515
                }
 
516
        }
 
517
        *bp = 0;
 
518
        yylval.s = tostring(buf);
 
519
        unput('/');
 
520
        RET(REGEXPR);
 
521
}
 
522
 
 
523
/* low-level lexical stuff, sort of inherited from lex */
 
524
 
 
525
char    ebuf[300];
 
526
char    *ep = ebuf;
 
527
char    yysbuf[100];    /* pushback buffer */
 
528
char    *yysptr = yysbuf;
 
529
FILE    *yyin = 0;
 
530
 
 
531
int input(void) /* get next lexical input character */
 
532
{
 
533
        int c;
 
534
        extern char *lexprog;
 
535
 
 
536
        if (yysptr > yysbuf)
 
537
                c = *--yysptr;
 
538
        else if (lexprog != NULL) {     /* awk '...' */
 
539
                if ((c = *lexprog) != 0)
 
540
                        lexprog++;
 
541
        } else                          /* awk -f ... */
 
542
                c = pgetc();
 
543
        if (c == '\n')
 
544
                lineno++;
 
545
        else if (c == EOF)
 
546
                c = 0;
 
547
        if (ep >= ebuf + sizeof ebuf)
 
548
                ep = ebuf;
 
549
        return *ep++ = c;
 
550
}
 
551
 
 
552
void unput(int c)       /* put lexical character back on input */
 
553
{
 
554
        if (c == '\n')
 
555
                lineno--;
 
556
        if (yysptr >= yysbuf + sizeof(yysbuf))
 
557
                FATAL("pushed back too much: %.20s...", yysbuf);
 
558
        *yysptr++ = c;
 
559
        if (--ep < ebuf)
 
560
                ep = ebuf + sizeof(ebuf) - 1;
 
561
}
 
562
 
 
563
void unputstr(char *s)  /* put a string back on input */
 
564
{
 
565
        int i;
 
566
 
 
567
        for (i = strlen(s)-1; i >= 0; i--)
 
568
                unput(s[i]);
 
569
}