684
728
*do_split(), *do_system(), *do_int(), *do_close(),
685
729
*do_atan2(), *do_sin(), *do_cos(), *do_rand(),
686
730
*do_srand(), *do_match(), *do_tolower(), *do_toupper(),
687
*do_sub(), *do_gsub();
689
/* Special functions for debugging */
691
NODE *do_prvars(), *do_bp();
731
*do_sub(), *do_gsub(), *do_strftime(), *do_systime();
694
733
/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
696
735
static struct token tokentab[] = {
697
{ "BEGIN", Node_illegal, LEX_BEGIN, 0, 0 },
698
{ "END", Node_illegal, LEX_END, 0, 0 },
699
{ "atan2", Node_builtin, LEX_BUILTIN, 0, do_atan2 },
701
{ "bp", Node_builtin, LEX_BUILTIN, 0, do_bp },
703
{ "break", Node_K_break, LEX_BREAK, 0, 0 },
704
{ "close", Node_builtin, LEX_BUILTIN, 0, do_close },
705
{ "continue", Node_K_continue, LEX_CONTINUE, 0, 0 },
706
{ "cos", Node_builtin, LEX_BUILTIN, 0, do_cos },
707
{ "delete", Node_K_delete, LEX_DELETE, 0, 0 },
708
{ "do", Node_K_do, LEX_DO, 0, 0 },
709
{ "else", Node_illegal, LEX_ELSE, 0, 0 },
710
{ "exit", Node_K_exit, LEX_EXIT, 0, 0 },
711
{ "exp", Node_builtin, LEX_BUILTIN, 0, do_exp },
712
{ "for", Node_K_for, LEX_FOR, 0, 0 },
713
{ "func", Node_K_function, LEX_FUNCTION, 0, 0 },
714
{ "function", Node_K_function, LEX_FUNCTION, 0, 0 },
715
{ "getline", Node_K_getline, LEX_GETLINE, 0, 0 },
716
{ "gsub", Node_builtin, LEX_BUILTIN, 0, do_gsub },
717
{ "if", Node_K_if, LEX_IF, 0, 0 },
718
{ "in", Node_illegal, LEX_IN, 0, 0 },
719
{ "index", Node_builtin, LEX_BUILTIN, 0, do_index },
720
{ "int", Node_builtin, LEX_BUILTIN, 0, do_int },
721
{ "length", Node_builtin, LEX_LENGTH, 0, do_length },
722
{ "log", Node_builtin, LEX_BUILTIN, 0, do_log },
723
{ "match", Node_builtin, LEX_BUILTIN, 0, do_match },
724
{ "next", Node_K_next, LEX_NEXT, 0, 0 },
725
{ "print", Node_K_print, LEX_PRINT, 0, 0 },
726
{ "printf", Node_K_printf, LEX_PRINTF, 0, 0 },
728
{ "prvars", Node_builtin, LEX_BUILTIN, 0, do_prvars },
730
{ "rand", Node_builtin, LEX_BUILTIN, 0, do_rand },
731
{ "return", Node_K_return, LEX_RETURN, 0, 0 },
732
{ "sin", Node_builtin, LEX_BUILTIN, 0, do_sin },
733
{ "split", Node_builtin, LEX_BUILTIN, 0, do_split },
734
{ "sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf },
735
{ "sqrt", Node_builtin, LEX_BUILTIN, 0, do_sqrt },
736
{ "srand", Node_builtin, LEX_BUILTIN, 0, do_srand },
737
{ "sub", Node_builtin, LEX_BUILTIN, 0, do_sub },
738
{ "substr", Node_builtin, LEX_BUILTIN, 0, do_substr },
739
{ "system", Node_builtin, LEX_BUILTIN, 0, do_system },
740
{ "tolower", Node_builtin, LEX_BUILTIN, 0, do_tolower },
741
{ "toupper", Node_builtin, LEX_BUILTIN, 0, do_toupper },
742
{ "while", Node_K_while, LEX_WHILE, 0, 0 },
736
{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
737
{"END", Node_illegal, LEX_END, 0, 0},
738
{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
739
{"break", Node_K_break, LEX_BREAK, 0, 0},
740
{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close},
741
{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
742
{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
743
{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
744
{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
745
{"else", Node_illegal, LEX_ELSE, 0, 0},
746
{"exit", Node_K_exit, LEX_EXIT, 0, 0},
747
{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
748
{"for", Node_K_for, LEX_FOR, 0, 0},
749
{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
750
{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
751
{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
752
{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
753
{"if", Node_K_if, LEX_IF, 0, 0},
754
{"in", Node_illegal, LEX_IN, 0, 0},
755
{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
756
{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
757
{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
758
{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
759
{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},
760
{"next", Node_K_next, LEX_NEXT, 0, 0},
761
{"print", Node_K_print, LEX_PRINT, 0, 0},
762
{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
763
{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
764
{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
765
{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
766
{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
767
{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
768
{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
769
{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
770
{"strftime", Node_builtin, LEX_BUILTIN, GAWK|A(1)|A(2), do_strftime},
771
{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
772
{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
773
{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
774
{"systime", Node_builtin, LEX_BUILTIN, GAWK|A(0), do_systime},
775
{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
776
{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
777
{"while", Node_K_while, LEX_WHILE, 0, 0},
745
static char *token_start;
749
782
yyerror(va_alist)
758
791
/* Find the current line in the input file */
760
beg = "(END OF FILE)";
763
if (*lexptr == '\n' && lexptr != lexptr_begin)
765
for (beg = lexptr; beg != lexptr_begin && *beg != '\n'; --beg)
794
for (beg = lexeme; beg != lexptr_begin && *beg != '\n'; --beg)
767
800
/* NL isn't guaranteed */
768
for (ptr = lexptr; *ptr && *ptr != '\n'; ptr++)
770
if (beg != lexptr_begin)
773
msg("syntax error near line %d:\n%.*s", lineno, ptr - beg, beg);
775
while (scan < token_start)
802
while (ptr < lexend && *ptr && *ptr != '\n')
805
thisline = "(END OF FILE)";
809
fprintf(stderr, "%.*s\n", (int) (ptr - thisline), thisline);
812
while (scan < lexeme)
783
821
mesg = va_arg(args, char *);
784
822
vfprintf(stderr, mesg, args);
786
824
putc('\n', stderr);
791
* Parse a C escape sequence. STRING_PTR points to a variable containing a
792
* pointer to the string to parse. That pointer is updated past the
793
* characters we use. The value of the escape sequence is returned.
795
* A negative value means the sequence \ newline was seen, which is supposed to
796
* be equivalent to nothing at all.
798
* If \ is followed by a null character, we return a negative value and leave
799
* the string pointer pointing at the null character.
801
* If \ is followed by 000, we return 0 and leave the string pointer after the
802
* zeros. A value of 0 does not mean end of string.
806
parse_escape(string_ptr)
809
register int c = *(*string_ptr)++;
843
while (++count < 3) {
844
if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
856
if (isxdigit((c = *(*string_ptr)++))) {
875
* Read the input and turn it into tokens. Input is now read from a file
876
* instead of from malloc'ed memory. The main program takes a program
877
* passed as a command line argument and writes it to a temp file. Otherwise
878
* the file name is made available in an external variable.
831
static int samefile = 0;
832
static int nextfile = 0;
833
static char *buf = NULL;
838
static int did_newline = 0;
839
# define SLOP 128 /* enough space to hold most source lines */
843
len = strlen(cmdline_src);
847
lexptr = lexptr_begin = cmdline_src;
848
lexend = lexptr + len;
849
} else if (!did_newline && *(lexptr-1) != '\n') {
851
* The following goop is to ensure that the source
852
* ends with a newline and that the entire current
853
* line is available for error messages.
858
offset = lexptr - lexeme;
859
for (scan = lexeme; scan > lexptr_begin; scan--)
865
emalloc(buf, char *, len+1, "get_src_buf");
866
memcpy(buf, scan, len);
870
lexeme = lexptr - offset;
874
lexptr = lexptr_begin = NULL;
878
source = srcfiles[nextfile];
879
if (source == NULL) {
882
return lexptr = lexptr_begin = NULL;
884
fd = pathopen(source);
886
fatal("can't open source file \"%s\" for reading (%s)",
887
source, strerror(errno));
888
len = optimal_bufsize(fd);
891
emalloc(buf, char *, len + SLOP, "get_src_buf");
892
lexptr_begin = buf + SLOP;
897
* Here, we retain the current source line (up to length SLOP)
898
* in the beginning of the buffer that was overallocated above
903
offset = lexptr - lexeme;
904
for (scan = lexeme; scan > lexptr_begin; scan--)
909
linelen = lexptr - scan;
912
thisline = buf + SLOP - linelen;
913
memcpy(thisline, scan, linelen);
914
lexeme = buf + SLOP - offset;
915
lexptr_begin = thisline;
917
n = read(fd, buf + SLOP, len);
919
fatal("can't read sourcefile \"%s\" (%s)",
920
source, strerror(errno));
924
return get_src_buf();
931
#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)
936
static int toksize = 60;
939
tokoffset = token - tokstart;
942
erealloc(tokstart, char *, toksize, "tokexpand");
944
emalloc(tokstart, char *, toksize, "tokexpand");
945
tokend = tokstart + toksize;
946
token = tokstart + tokoffset;
953
if (lexptr && lexptr < lexend)
960
#define nextc() ((lexptr && lexptr < lexend) ? \
962
(get_src_buf() ? *lexptr++ : '\0') \
965
#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
968
* Read the input and turn it into tokens.
885
register int namelen;
886
register char *tokstart;
888
static did_newline = 0; /* the grammar insists that actions end
889
* with newlines. This was easier than
890
* hacking the grammar. */
891
975
int seen_e = 0; /* These are for numbers */
892
976
int seen_point = 0;
894
extern char **sourcefile;
895
extern int tempsource, numfiles;
896
static int file_opened = 0;
898
static char cbuf[BUFSIZ];
977
int esc_seen; /* for literal strings */
899
978
int low, mid, high;
901
extern int debugging;
910
for (i = 0; i <= numfiles; i++)
911
fprintf (stderr, "sourcefile[%d] = %s\n", i,
916
if ((fin = pathopen (sourcefile[++curinfile])) == NULL)
917
fatal("cannot open `%s' for reading (%s)",
918
sourcefile[curinfile],
920
*(lexptr = cbuf) = '\0';
922
* immediately unlink the tempfile so that it will
923
* go away cleanly if we bomb.
925
if (tempsource && curinfile == 0)
926
(void) unlink (sourcefile[curinfile]);
931
if (fgets (cbuf, sizeof cbuf, fin) == NULL) {
933
fclose (fin); /* be neat and clean */
934
if (curinfile < numfiles)
938
lexptr = lexptr_begin = cbuf;
979
static int did_newline = 0;
940
987
if (want_regexp) {
941
988
int in_brack = 0;
944
token_start = tokstart = lexptr;
945
while (c = *lexptr++) {
992
while (c = nextc()) {
954
if (*lexptr++ == '\0') {
955
yyerror("unterminated regexp ends with \\");
957
} else if (lexptr[-1] == '\n')
1001
if ((c = nextc()) == '\0') {
1002
yyerror("unterminated regexp ends with \\ at end of file");
1003
} else if (c == '\n') {
960
1009
case '/': /* end of the regexp */
965
1015
yylval.sval = tokstart;
1019
yyerror("unterminated regexp");
970
lexptr--; /* so error messages work */
971
yyerror("unterminated regexp");
1021
yyerror("unterminated regexp at end of file");
977
if (*lexptr == '\n') {
983
while (*lexptr == ' ' || *lexptr == '\t')
986
token_start = tokstart = lexptr;
988
switch (c = *lexptr++) {
1027
while ((c = nextc()) == ' ' || c == '\t')
1033
yylval.nodetypeval = Node_illegal;
996
1043
case '#': /* it's a comment */
997
while (*lexptr != '\n' && *lexptr != '\0')
1044
while ((c = nextc()) != '\n') {
1002
if (*lexptr == '\n') {
1052
#ifdef RELAXED_CONTINUATION
1053
if (!strict) { /* strip trailing white-space and/or comment */
1054
while ((c = nextc()) == ' ' || c == '\t') continue;
1056
while ((c = nextc()) != '\n') if (!c) break;
1059
#endif /*RELAXED_CONTINUATION*/
1060
if (nextc() == '\n') {
1064
yyerror("inappropriate use of backslash");
1018
* set node type to ILLEGAL because the action should set it
1019
* to the right thing
1021
yylval.nodetypeval = Node_illegal;
1026
yylval.nodetypeval = Node_illegal;
1030
if (*lexptr == '=') {
1083
if ((c = nextc()) == '=') {
1031
1084
yylval.nodetypeval = Node_assign_times;
1033
1085
return ASSIGNOP;
1034
} else if (*lexptr == '*') { /* make ** and **= aliases
1036
if (lexptr[1] == '=') {
1086
} else if (do_posix) {
1089
} else if (c == '*') {
1090
/* make ** and **= aliases for ^ and ^= */
1091
static int did_warn_op = 0, did_warn_assgn = 0;
1093
if (nextc() == '=') {
1094
if (do_lint && ! did_warn_assgn) {
1096
warning("**= is not allowed by POSIX");
1037
1098
yylval.nodetypeval = Node_assign_exp;
1039
1099
return ASSIGNOP;
1041
yylval.nodetypeval = Node_illegal;
1102
if (do_lint && ! did_warn_op) {
1104
warning("** is not allowed by POSIX");
1046
yylval.nodetypeval = Node_illegal;
1050
if (want_assign && *lexptr == '=') {
1051
yylval.nodetypeval = Node_assign_quotient;
1114
if (nextc() == '=') {
1115
yylval.nodetypeval = Node_assign_quotient;
1055
yylval.nodetypeval = Node_illegal;
1059
if (*lexptr == '=') {
1123
if (nextc() == '=') {
1060
1124
yylval.nodetypeval = Node_assign_mod;
1062
1125
return ASSIGNOP;
1064
yylval.nodetypeval = Node_illegal;
1068
if (*lexptr == '=') {
1132
static int did_warn_op = 0, did_warn_assgn = 0;
1134
if (nextc() == '=') {
1136
if (do_lint && ! did_warn_assgn) {
1138
warning("operator `^=' is not supported in old awk");
1069
1140
yylval.nodetypeval = Node_assign_exp;
1071
1141
return ASSIGNOP;
1073
yylval.nodetypeval = Node_illegal;
1144
if (do_lint && ! did_warn_op) {
1146
warning("operator `^' is not supported in old awk");
1077
if (*lexptr == '=') {
1152
if ((c = nextc()) == '=') {
1078
1153
yylval.nodetypeval = Node_assign_plus;
1080
1154
return ASSIGNOP;
1082
if (*lexptr == '+') {
1083
yylval.nodetypeval = Node_illegal;
1085
1157
return INCREMENT;
1087
yylval.nodetypeval = Node_illegal;
1091
if (*lexptr == '=') {
1162
if ((c = nextc()) == '=') {
1092
1163
yylval.nodetypeval = Node_notequal;
1096
if (*lexptr == '~') {
1097
1167
yylval.nodetypeval = Node_nomatch;
1099
1169
return MATCHOP;
1101
yylval.nodetypeval = Node_illegal;
1105
if (*lexptr == '=') {
1175
if (nextc() == '=') {
1106
1176
yylval.nodetypeval = Node_leq;
1110
1179
yylval.nodetypeval = Node_less;
1114
if (*lexptr == '=') {
1184
if (nextc() == '=') {
1115
1185
yylval.nodetypeval = Node_equal;
1119
1188
yylval.nodetypeval = Node_assign;
1120
1190
return ASSIGNOP;
1123
if (*lexptr == '=') {
1193
if ((c = nextc()) == '=') {
1124
1194
yylval.nodetypeval = Node_geq;
1127
} else if (*lexptr == '>') {
1196
} else if (c == '>') {
1128
1197
yylval.nodetypeval = Node_redirect_append;
1130
1198
return APPEND_OP;
1132
1200
yylval.nodetypeval = Node_greater;
1136
1205
yylval.nodetypeval = Node_match;
1137
1207
return MATCHOP;
1298
1391
mid = (low + high) / 2;
1299
1392
c = *tokstart - tokentab[mid].operator[0];
1300
i = c ? c : strcmp (tokkey, tokentab[mid].operator);
1393
i = c ? c : strcmp (tokstart, tokentab[mid].operator);
1302
1395
if (i < 0) { /* token < mid */
1303
1396
high = mid - 1;
1304
1397
} else if (i > 0) { /* token > mid */
1307
lexptr = tokstart + namelen;
1308
if (strict && tokentab[mid].nostrict)
1401
if (tokentab[mid].flags & GAWK)
1402
warning("%s() is a gawk extension",
1403
tokentab[mid].operator);
1404
if (tokentab[mid].flags & NOT_POSIX)
1405
warning("POSIX does not allow %s",
1406
tokentab[mid].operator);
1407
if (tokentab[mid].flags & NOT_OLD)
1408
warning("%s is not supported in old awk",
1409
tokentab[mid].operator);
1411
if ((strict && (tokentab[mid].flags & GAWK))
1412
|| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
1310
1414
if (tokentab[mid].class == LEX_BUILTIN
1311
|| tokentab[mid].class == LEX_LENGTH)
1312
yylval.ptrval = tokentab[mid].ptr;
1415
|| tokentab[mid].class == LEX_LENGTH
1314
1419
yylval.nodetypeval = tokentab[mid].value;
1315
1421
return tokentab[mid].class;
1319
/* It's a name. See how long it is. */
1320
1425
yylval.sval = tokkey;
1321
lexptr = tokstart + namelen;
1322
1426
if (*lexptr == '(')
1323
1427
return FUNC_CALL;
1333
#define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk"
1342
static char *savepath = DEFPATH;
1343
static int first = 1;
1345
char trypath[BUFSIZ];
1348
extern int debugging;
1352
if (strcmp (file, "-") == 0)
1356
return (fopen (file, "r"));
1360
if ((awkpath = getenv ("AWKPATH")) != NULL && *awkpath)
1361
savepath = awkpath; /* used for restarting */
1365
/* some kind of path name, no search */
1367
if (strchr (file, '/') != NULL)
1369
if (strchr (file, '/') != NULL || strchr (file, '\\') != NULL
1370
|| strchr (file, ':') != NULL)
1372
return ( (fd = devopen (file, "r")) >= 0 ?
1378
/* this should take into account limits on size of trypath */
1379
for (cp = trypath; *awkpath && *awkpath != ENVSEP; )
1382
if (cp != trypath) { /* nun-null element in path */
1386
strcpy (trypath, file);
1389
fprintf(stderr, "trying: %s\n", trypath);
1391
if ((fd = devopen (trypath, "r")) >= 0
1392
&& (fp = fdopen(fd, "r")) != NULL)
1395
/* no luck, keep going */
1396
if(*awkpath == ENVSEP && awkpath[1] != '\0')
1397
awkpath++; /* skip colon */
1401
* Under DOS (and probably elsewhere) you might have one of the awk
1402
* paths defined, WITHOUT the current working directory in it.
1403
* Therefore you should try to open the file in the current directory.
1405
return ( (fd = devopen(file, "r")) >= 0 ? fdopen(fd, "r") : NULL);