~vcs-imports/gawk/master

« back to all changes in this revision

Viewing changes to awkgram.y

  • Committer: Arnold D. Robbins
  • Date: 2010-07-16 11:47:02 UTC
  • Revision ID: git-v1:315bd501ca696bc3e3c938b4604d8dac7a6f512f
Tags: gawk-3.1.5
Move to gawk 3.1.5.

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
 */
4
4
 
5
5
/* 
6
 
 * Copyright (C) 1986, 1988, 1989, 1991-2004 the Free Software Foundation, Inc.
 
6
 * Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
7
7
 * 
8
8
 * This file is part of GAWK, the GNU implementation of the
9
9
 * AWK Programming Language.
20
20
 * 
21
21
 * You should have received a copy of the GNU General Public License
22
22
 * along with this program; if not, write to the Free Software
23
 
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
 
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
24
24
 */
25
25
 
26
26
%{
308
308
                  NODE *n;
309
309
                  size_t len = strlen($3);
310
310
 
311
 
                  if (do_lint && ($3)[0] == '*') {
312
 
                        /* possible C comment */
313
 
                        if (($3)[len-1] == '*')
 
311
                  if (do_lint) {
 
312
                        if (len == 0)
 
313
                                lintwarn(_("regexp constant `//' looks like a C++ comment, but is not"));
 
314
                        else if (($3)[0] == '*' && ($3)[len-1] == '*')
 
315
                                /* possible C comment */
314
316
                                lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart);
315
317
                  }
316
318
                  getnode(n);
404
406
                            && strcmp($5, arr->vname) == 0) {
405
407
                                $8->type = Node_K_delete_loop;
406
408
                                $$ = $8;
 
409
                                free($3);       /* thanks to valgrind for pointing these out */
 
410
                                free($5);
407
411
                        }
408
412
                        else
409
413
                                goto regular_loop;
966
970
          }
967
971
        | '$' non_post_simp_exp
968
972
                { $$ = node($2, Node_field_spec, (NODE *) NULL); }
 
973
/*
 
974
#if 0
 
975
        | lex_builtin
 
976
                { fatal(_("can't use built-in function `%s' as a variable"), tokstart); }
 
977
#endif
 
978
*/
969
979
        ;
970
980
 
971
981
l_brace
1105
1115
/* This macro means that last nextc() return a singlebyte character
1106
1116
   or 1st byte of a multibyte character.  */
1107
1117
#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
 
1118
#else /* MBS_SUPPORT */
 
1119
/* a dummy */
 
1120
#define nextc_is_1stbyte 1
1108
1121
#endif /* MBS_SUPPORT */
1109
1122
 
1110
1123
/* getfname --- return name of a builtin function (for pretty printing) */
1432
1445
static int
1433
1446
nextc(void)
1434
1447
{
1435
 
        if (gawk_mb_cur_max > 1)        {
 
1448
        if (gawk_mb_cur_max > 1) {
 
1449
                if (!lexptr || lexptr >= lexend) {
 
1450
                        if (! get_src_buf())
 
1451
                                return EOF;
 
1452
                }
 
1453
 
1436
1454
                /* Update the buffer index.  */
1437
1455
                cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
1438
1456
                        cur_ring_idx + 1;
1444
1462
                        mbstate_t tmp_state;
1445
1463
                        size_t mbclen;
1446
1464
 
1447
 
                        if (!lexptr || lexptr >= lexend)
1448
 
                                if (!get_src_buf()) {
1449
 
                                        return EOF;
1450
 
                                }
1451
 
 
1452
1465
                        for (idx = 0 ; lexptr + idx < lexend ; idx++) {
1453
1466
                                tmp_state = cur_mbstate;
1454
1467
                                mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
1522
1535
 
1523
1536
/* pushback --- push a character back on the input */
1524
1537
 
1525
 
#ifdef MBS_SUPPORT
1526
 
 
1527
 
static void
 
1538
static inline void
1528
1539
pushback(void)
1529
1540
{
1530
 
        if (gawk_mb_cur_max > 1) {
 
1541
#ifdef MBS_SUPPORT
 
1542
        if (gawk_mb_cur_max > 1)
1531
1543
                cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
1532
1544
                        cur_ring_idx - 1;
1533
 
                (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
1534
 
        } else
1535
 
                (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
 
1545
#endif
 
1546
        (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
1536
1547
}
1537
1548
 
1538
 
#else
1539
 
 
1540
 
#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
1541
 
 
1542
 
#endif /* MBS_SUPPORT */
1543
1549
 
1544
1550
/* allow_newline --- allow newline after &&, ||, ? and : */
1545
1551
 
1630
1636
                tok = tokstart;
1631
1637
                for (;;) {
1632
1638
                        c = nextc();
1633
 
#ifdef MBS_SUPPORT
1634
 
                        if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
1635
 
#endif
1636
 
                        switch (c) {
 
1639
 
 
1640
                        if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
1637
1641
                        case '[':
1638
1642
                                /* one day check for `.' and `=' too */
1639
1643
                                if (nextc() == ':' || in_brack == 0)
1668
1672
end_regexp:
1669
1673
                                tokadd('\0');
1670
1674
                                yylval.sval = tokstart;
 
1675
                                if (do_lint) {
 
1676
                                        int peek = nextc();
 
1677
 
 
1678
                                        pushback();
 
1679
                                        if (peek == 'i' || peek == 's') {
 
1680
                                                if (source)
 
1681
                                                        lintwarn(
 
1682
                                                _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
 
1683
                                                                source, sourceline, peek);
 
1684
                                                else
 
1685
                                                        lintwarn(
 
1686
                                                _("tawk regex modifier `/.../%c' doesn't work in gawk"),
 
1687
                                                                peek);
 
1688
                                        }
 
1689
                                }
1671
1690
                                return lasttok = REGEXP;
1672
1691
                        case '\n':
1673
1692
                                pushback();
1681
1700
                }
1682
1701
        }
1683
1702
retry:
1684
 
        while ((c = nextc()) == ' ' || c == '\t')
 
1703
 
 
1704
        /* skipping \r is a hack, but windows is just too pervasive. sigh. */
 
1705
        while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
1685
1706
                continue;
1686
1707
 
1687
1708
        lexeme = lexptr ? lexptr - 1 : lexptr;
1689
1710
        tok = tokstart;
1690
1711
        yylval.nodetypeval = Node_illegal;
1691
1712
 
1692
 
#ifdef MBS_SUPPORT
1693
 
        if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
1694
 
#endif
1695
 
        switch (c) {
 
1713
        if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
1696
1714
        case EOF:
1697
1715
                if (lasttok != NEWLINE) {
1698
1716
                        lasttok = NEWLINE;
1736
1754
                 */
1737
1755
                if (! do_traditional) {
1738
1756
                        /* strip trailing white-space and/or comment */
1739
 
                        while ((c = nextc()) == ' ' || c == '\t')
 
1757
                        while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
1740
1758
                                continue;
1741
1759
                        if (c == '#') {
1742
1760
                                if (do_lint)
1937
1955
                                yyerror(_("unterminated string"));
1938
1956
                                exit(1);
1939
1957
                        }
1940
 
#ifdef MBS_SUPPORT
1941
 
                        if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
1942
 
#endif
1943
 
                        if (c == '\\') {
 
1958
                        if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
 
1959
                            c == '\\') {
1944
1960
                                c = nextc();
1945
1961
                                if (c == '\n') {
1946
1962
                                        sourceline++;
2006
2022
                        case 'X':
2007
2023
                                if (do_traditional)
2008
2024
                                        goto done;
2009
 
                                if (tok == tokstart + 2)
2010
 
                                        inhex = TRUE;
 
2025
                                if (tok == tokstart + 2) {
 
2026
                                        int peek = nextc();
 
2027
 
 
2028
                                        if (ISXDIGIT(peek)) {
 
2029
                                                inhex = TRUE;
 
2030
                                                pushback();     /* following digit */
 
2031
                                        } else {
 
2032
                                                pushback();     /* x or X */
 
2033
                                                goto done;
 
2034
                                        }
 
2035
                                }
2011
2036
                                break;
2012
2037
                        case '.':
2013
 
                                if (seen_point) {
 
2038
                                /* period ends exponent part of floating point number */
 
2039
                                if (seen_point || seen_e) {
2014
2040
                                        gotnumber = TRUE;
2015
2041
                                        break;
2016
2042
                                }
2025
2051
                                        break;
2026
2052
                                }
2027
2053
                                seen_e = TRUE;
2028
 
                                if ((c = nextc()) == '-' || c == '+')
2029
 
                                        tokadd(c);
2030
 
                                else
2031
 
                                        pushback();
 
2054
                                if ((c = nextc()) == '-' || c == '+') {
 
2055
                                        int c2 = nextc();
 
2056
 
 
2057
                                        if (ISDIGIT(c2)) {
 
2058
                                                tokadd(c);
 
2059
                                                tokadd(c2);
 
2060
                                        } else {
 
2061
                                                pushback();     /* non-digit after + or - */
 
2062
                                                pushback();     /* + or - */
 
2063
                                                pushback();     /* e or E */
 
2064
                                        }
 
2065
                                } else if (! ISDIGIT(c)) {
 
2066
                                        pushback();     /* character after e or E */
 
2067
                                        pushback();     /* e or E */
 
2068
                                } else {
 
2069
                                        pushback();     /* digit */
 
2070
                                }
2032
2071
                                break;
2033
2072
                        case 'a':
2034
2073
                        case 'A':
2069
2108
                        eof_warned = TRUE;
2070
2109
                }
2071
2110
                tokadd('\0');
2072
 
                if (! do_traditional && isnondecimal(tokstart)) {
2073
 
                        static short warned = FALSE;
2074
 
                        if (do_lint && ! warned) {
2075
 
                                warned = TRUE;
2076
 
                                lintwarn("numeric constant `%.*s' treated as octal or hexadecimal",
2077
 
                                        strlen(tokstart)-1, tokstart);
 
2111
                if (! do_traditional && isnondecimal(tokstart, FALSE)) {
 
2112
                        if (do_lint) {
 
2113
                                if (ISDIGIT(tokstart[1]))       /* not an 'x' or 'X' */
 
2114
                                        lintwarn("numeric constant `%.*s' treated as octal",
 
2115
                                                (int) strlen(tokstart)-1, tokstart);
 
2116
                                else if (tokstart[1] == 'x' || tokstart[1] == 'X')
 
2117
                                        lintwarn("numeric constant `%.*s' treated as hexadecimal",
 
2118
                                                (int) strlen(tokstart)-1, tokstart);
2078
2119
                        }
2079
2120
                        yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
2080
2121
                } else
2120
2161
         *
2121
2162
         * print "xyzzy"$_"foo"
2122
2163
         *
2123
 
         * Without the check for ` lasttok != '$'' ', this is parsed as
 
2164
         * Without the check for ` lasttok != '$' ', this is parsed as
2124
2165
         *
2125
2166
         * print "xxyzz" $(_"foo")
2126
2167
         *
3021
3062
        }
3022
3063
}
3023
3064
 
 
3065
/* deferred varibles --- those that are only defined if needed. */
 
3066
 
 
3067
/*
 
3068
 * Is there any reason to use a hash table for deferred variables?  At the
 
3069
 * moment, there are only 1 to 3 such variables, so it may not be worth
 
3070
 * the overhead.  If more modules start using this facility, it should
 
3071
 * probably be converted into a hash table.
 
3072
 */
 
3073
 
 
3074
static struct deferred_variable {
 
3075
        NODE *(*load_func)(void);
 
3076
        struct deferred_variable *next;
 
3077
        char name[1];   /* variable-length array */
 
3078
} *deferred_variables;
 
3079
 
 
3080
/* register_deferred_variable --- add a var name and loading function to the list */
 
3081
 
 
3082
void
 
3083
register_deferred_variable(const char *name, NODE *(*load_func)(void))
 
3084
{
 
3085
        struct deferred_variable *dv;
 
3086
        size_t sl = strlen(name);
 
3087
 
 
3088
        emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
 
3089
                "register_deferred_variable");
 
3090
        dv->load_func = load_func;
 
3091
        dv->next = deferred_variables;
 
3092
        memcpy(dv->name, name, sl+1);
 
3093
        deferred_variables = dv;
 
3094
}
 
3095
 
3024
3096
/* variable --- make sure NAME is in the symbol table */
3025
3097
 
3026
3098
NODE *
3035
3107
 
3036
3108
        } else {
3037
3109
                /* not found */
3038
 
                if (! do_traditional && STREQ(name, "PROCINFO"))
3039
 
                        r = load_procinfo();
3040
 
                else if (STREQ(name, "ENVIRON"))
3041
 
                        r = load_environ();
3042
 
                else {
3043
 
                        /*
3044
 
                         * This is the only case in which we may not free the string.
3045
 
                         */
3046
 
                        NODE *n;
3047
 
 
3048
 
                        if (type == Node_var_array)
3049
 
                                n = node((NODE *) NULL, type, (NODE *) NULL);
3050
 
                        else
3051
 
                                n = node(Nnull_string, type, (NODE *) NULL);
3052
 
 
3053
 
                        return install(name, n);
 
3110
                struct deferred_variable *dv;
 
3111
 
 
3112
                for (dv = deferred_variables; TRUE; dv = dv->next) {
 
3113
                        if (dv == NULL) {
 
3114
                                /*
 
3115
                                 * This is the only case in which we may not
 
3116
                                 * free the string.
 
3117
                                 */
 
3118
                                NODE *n;
 
3119
 
 
3120
                                if (type == Node_var_array)
 
3121
                                        n = node(NULL, type, NULL);
 
3122
                                else
 
3123
                                        n = node(Nnull_string, type, NULL);
 
3124
 
 
3125
                                return install(name, n);
 
3126
                        }
 
3127
                        if (STREQ(name, dv->name)) {
 
3128
                                r = (*dv->load_func)();
 
3129
                                break;
 
3130
                        }
3054
3131
                }
3055
3132
        }
3056
3133
        if (can_free)