~ubuntu-branches/ubuntu/hardy/clamav/hardy-security

« back to all changes in this revision

Viewing changes to libclamav/jsparse/js-norm.c

  • Committer: Bazaar Package Importer
  • Author(s): Jamie Strandboge
  • Date: 2009-04-30 14:44:26 UTC
  • mfrom: (0.28.3 sid)
  • Revision ID: james.westby@ubuntu.com-20090430144426-933t29chbo6phaa7
Tags: 0.94.dfsg.2-1ubuntu0.3~hardy4
No change rebuild from backports for use with ClamAV 0.94

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 *  Javascript normalizer.
 
3
 *
 
4
 *  Copyright (C) 2008 Sourcefire, Inc.
 
5
 *
 
6
 *  Authors: Török Edvin
 
7
 *
 
8
 *  This program is free software; you can redistribute it and/or modify
 
9
 *  it under the terms of the GNU General Public License version 2 as
 
10
 *  published by the Free Software Foundation.
 
11
 *
 
12
 *  This program is distributed in the hope that it will be useful,
 
13
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
14
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
15
 *  GNU General Public License for more details.
 
16
 *
 
17
 *  You should have received a copy of the GNU General Public License
 
18
 *  along with this program; if not, write to the Free Software
 
19
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
20
 *  MA 02110-1301, USA.
 
21
 */
 
22
#ifdef HAVE_CONFIG_H
 
23
#include "clamav-config.h"
 
24
#endif
 
25
 
 
26
/* assert() only enabled with ./configure --enable-debug */
 
27
#ifndef CL_DEBUG
 
28
#define NDEBUG
 
29
#endif
 
30
 
 
31
#include <stdio.h>
 
32
 
 
33
#ifdef HAVE_UNISTD_H
 
34
#include <unistd.h>
 
35
#endif
 
36
#include <fcntl.h>
 
37
#include <stdlib.h>
 
38
#include <string.h>
 
39
#include <ctype.h>
 
40
#include <assert.h>
 
41
 
 
42
#include "cltypes.h"
 
43
#include "lexglobal.h"
 
44
#include "hashtab.h"
 
45
#include "others.h"
 
46
#include "str.h"
 
47
#include "js-norm.h"
 
48
#include "jsparse/generated/operators.h"
 
49
#include "jsparse/generated/keywords.h"
 
50
#include "jsparse/textbuf.h"
 
51
 
 
52
/* ----------- tokenizer ---------------- */
 
53
enum tokenizer_state {
 
54
        Initial,
 
55
        MultilineComment,
 
56
        SinglelineComment,
 
57
        Number,
 
58
        DoubleQString,
 
59
        SingleQString,
 
60
        Identifier,
 
61
        Dummy
 
62
};
 
63
 
 
64
 
 
65
typedef struct scanner {
 
66
        struct text_buffer buf;
 
67
        const char *yytext;
 
68
        size_t yylen;
 
69
        const char *in;
 
70
        size_t insize;
 
71
        size_t pos;
 
72
        size_t lastpos;
 
73
        enum tokenizer_state state;
 
74
        enum tokenizer_state last_state;
 
75
} *yyscan_t;
 
76
 
 
77
typedef int YY_BUFFER_STATE;
 
78
 
 
79
static int yylex( YYSTYPE *lvalp, yyscan_t  );
 
80
static void yy_delete_buffer( YY_BUFFER_STATE, yyscan_t);
 
81
static YY_BUFFER_STATE yy_scan_bytes( const char *, size_t, yyscan_t scanner );
 
82
static const char *yyget_text ( yyscan_t scanner );
 
83
static int yyget_leng ( yyscan_t scanner );
 
84
static int yylex_init ( yyscan_t * ptr_yy_globals ) ;
 
85
static void yyset_debug (int debug_flag ,yyscan_t yyscanner );
 
86
static int yylex_destroy ( yyscan_t yyscanner ) ;
 
87
/* ----------- tokenizer end ---------------- */
 
88
 
 
89
enum fsm_state {
 
90
        Base,
 
91
        InsideVar,
 
92
        InsideInitializer,
 
93
        WaitFunctionName,
 
94
        WaitParameterList,
 
95
        InsideFunctionDecl
 
96
};
 
97
 
 
98
struct scope {
 
99
        struct hashtable id_map;
 
100
        struct scope *parent;/* hierarchy */
 
101
        struct scope *nxt;/* all scopes kept in a list so we can easily free all of them */
 
102
        enum fsm_state fsm_state;
 
103
        int  last_token;
 
104
        unsigned int brackets;
 
105
        unsigned int blocks;
 
106
};
 
107
 
 
108
struct tokens {
 
109
        yystype *data;
 
110
        size_t   cnt;
 
111
        size_t   capacity;
 
112
};
 
113
 
 
114
/* state for the current JS file being parsed */
 
115
struct parser_state {
 
116
        unsigned long     var_uniq;
 
117
        unsigned long     syntax_errors;
 
118
        unsigned int      rec;
 
119
        struct scope *global;
 
120
        struct scope *current;
 
121
        struct scope *list;
 
122
        yyscan_t scanner;
 
123
        struct tokens tokens;
 
124
};
 
125
 
 
126
static struct scope* scope_new(struct parser_state *state)
 
127
{
 
128
        struct scope *parent = state->current;
 
129
        struct scope *s = cli_calloc(1, sizeof(*s));
 
130
        if(!s)
 
131
                return NULL;
 
132
        if(hashtab_init(&s->id_map, 10) < 0) {
 
133
                free(s);
 
134
                return NULL;
 
135
        }
 
136
        s->parent = parent;
 
137
        s->fsm_state = Base;
 
138
        s->nxt = state->list;
 
139
        state->list = s;
 
140
        state->current = s;
 
141
        return s;
 
142
}
 
143
 
 
144
static struct scope* scope_done(struct scope *s)
 
145
{
 
146
        struct scope* parent = s->parent;
 
147
        /* TODO: have a hashtab_destroy */
 
148
        hashtab_clear(&s->id_map);
 
149
        free(s->id_map.htable);
 
150
        free(s);
 
151
        return parent;
 
152
}
 
153
 
 
154
/* transitions:
 
155
 *   Base --(VAR)--> InsideVar
 
156
 *   InsideVar --(Identifier)-->InsideInitializer
 
157
 *   InsideVar --(anything_else) --> POP (to Base)
 
158
 *   InsideInitializer --(COMMA)--> POP (to InsideVar)
 
159
 *   InsideInitializer | InsideVar --(SEMICOLON) --> POP (to Base)
 
160
 *   InsideInitializer --(BRACKET_OPEN) --> WaitBrClose
 
161
 *   InsideInitializer --(PAR_OPEN) --> WaitParClose
 
162
 *   WaitBrClose --(BRACKET_OPEN) --> increase depth
 
163
 *   WaitBrClose --(BRACKET_CLOSE) --> POP
 
164
 *   WaitParClose --(PAR_CLOSE) --> POP
 
165
 *   WaitParClose --(PAR_OPEN) --> increase depth
 
166
 */
 
167
 
 
168
/* Base --(VAR)--> PUSH, to InsideVar
 
169
 * InsideVar --(Identifier)--> InsideInitializer
 
170
 * InsideVar --(ELSE)--> POP, inc. syntax_errors
 
171
 * InsideInitializer --(COMMA)--> POP (to InsideVar)
 
172
 * --(BRACKET_OPEN)--> inc bracket_counter
 
173
 * --(PAR_OPEN)--> inc par_counter
 
174
 * --(BRACKET_CLOSE) --> dec bracket_counter
 
175
 * --(PAR_CLOSE)--> dec par_counter
 
176
 * --(VAR)--> PUSH, to InsideVar (if bracket_counter != 0 || par_counter != 0)
 
177
 *        --> POP, to InsideVar, inc. syntax_errors (if bracket_counter == 0  && par_counter == 0)
 
178
 *  POP only allowed if bracket_counter == 0 && par_counter == 0 
 
179
 *
 
180
 * InsideInitializer acts differently, make it only a flag
 
181
 * ....................
 
182
 *
 
183
 * Pushing, Poping is done when entering / exiting function scopes,
 
184
 * tracking { and function ( is done by the function scope tracker too.
 
185
 *
 
186
 * we only need to track brackets.
 
187
 */
 
188
 
 
189
 
 
190
/*
 
191
 * var x = document;
 
192
 * x.writeln(...);
 
193
 *
 
194
 * ^we must not normalize member method names
 
195
 */
 
196
 
 
197
/*
 
198
 * Variables are declared at function scope, and their initial value is
 
199
 * undefined. At the point where the initializer is, and from there on the value
 
200
 * is defined.
 
201
 *
 
202
 * { doesn't introduce a new variable scope, they are in function's scope too
 
203
 *
 
204
 * function foo() {
 
205
 *  alert(x); -> x exists, undefined
 
206
 *  var x=5; 
 
207
 *  alert(x); -> x exists, =5
 
208
 * }
 
209
 * 
 
210
 * vs.
 
211
 *
 
212
 * function bar() {
 
213
 *   alert(x);//error, x not declared
 
214
 *   x=5;
 
215
 *   }
 
216
 *
 
217
 * vs.
 
218
 *
 
219
 * but we can declare variables without var, only valid if we use them after
 
220
 * assigning.
 
221
 *
 
222
 * function foobar() {
 
223
 *   x=5;
 
224
 *   alert(x);//x is defined, value is 5
 
225
 *   }
 
226
 *
 
227
 * other examples:
 
228
 * function foo2() {
 
229
 *   alert(x); -> x exists, undefined
 
230
 *   {
 
231
 *       var x=5; -> x equals to 5
 
232
 *   }
 
233
 *   alert(x); -> x is 5
 
234
 * }
 
235
 *
 
236
 * function foo3() {
 
237
 *   var x=4; -> x exists, equals to 4
 
238
 *   alert(x); -> x exists, equals to 4
 
239
 *   {
 
240
 *       var x=5; -> x equals to 5
 
241
 *   }
 
242
 *   alert(x); -> x is 5
 
243
 * }
 
244
 *
 
245
 * function bar3() {
 
246
 *   //same as foo3
 
247
 *   var x=4;
 
248
 *   alert(x);
 
249
 *   { 
 
250
 *        x=5;
 
251
 *   }
 
252
 *   alert(x);
 
253
 * }
 
254
 *
 
255
 */
 
256
 
 
257
 
 
258
static const char* scope_declare(struct scope *s, const char *token, const size_t len, struct parser_state *state)
 
259
{
 
260
        const struct element *el = hashtab_insert(&s->id_map, token, len, state->var_uniq++);
 
261
        /* hashtab_insert either finds an already existing entry, or allocates a
 
262
         * new one, we return the allocated string */
 
263
        return el ? el->key : NULL;
 
264
}
 
265
 
 
266
static const char* scope_use(struct scope *s, const char *token, const size_t len)
 
267
{
 
268
        const struct element *el = hashtab_find(&s->id_map, token, len);
 
269
        if(el) {
 
270
                /* identifier already found in current scope,
 
271
                 * return here to avoid overwriting uniq id */
 
272
                return el->key;
 
273
        }
 
274
        /* identifier not yet in current scope's hashtab, add with ID -1.
 
275
         * Later if we find a declaration it will automatically assign a uniq ID
 
276
         * to it. If not, we'll know that we have to push ID == -1 tokens to an
 
277
         * outer scope.*/
 
278
        el = hashtab_insert(&s->id_map, token, len, -1);
 
279
        return el ? el->key : NULL;
 
280
}
 
281
 
 
282
static long scope_lookup(struct scope *s, const char *token, const size_t len)
 
283
{
 
284
        while(s) {
 
285
                const struct element *el = hashtab_find(&s->id_map, token, len);
 
286
                if(el && el->data != -1) {
 
287
                        return el->data;
 
288
                }
 
289
                /* not found in current scope, try in outer scope */
 
290
                s = s->parent;
 
291
        }
 
292
        return -1;
 
293
}
 
294
 
 
295
static int tokens_ensure_capacity(struct tokens *tokens, size_t cap)
 
296
{
 
297
        if(tokens->capacity < cap) {
 
298
                cap += 1024;
 
299
                tokens->data = cli_realloc(tokens->data, cap * sizeof(*tokens->data));
 
300
                if(!tokens->data)
 
301
                        return CL_EMEM;
 
302
                tokens->capacity = cap;
 
303
        }
 
304
        return CL_SUCCESS;
 
305
}
 
306
 
 
307
static int add_token(struct parser_state *state, const yystype *token)
 
308
{
 
309
        if(tokens_ensure_capacity(&state->tokens, state->tokens.cnt + 1) < 0)
 
310
                return -1;
 
311
        state->tokens.data[state->tokens.cnt++] = *token;
 
312
        return 0;
 
313
}
 
314
 
 
315
struct buf {
 
316
        size_t pos;
 
317
        int outfd;
 
318
        char buf[65536];
 
319
};
 
320
 
 
321
static inline int buf_outc(char c, struct buf *buf)
 
322
{
 
323
        if(buf->pos >= sizeof(buf->buf)) {
 
324
                if(write(buf->outfd, buf->buf, sizeof(buf->buf)) != sizeof(buf->buf))
 
325
                        return CL_EIO;
 
326
                buf->pos = 0;
 
327
        }
 
328
        buf->buf[buf->pos++] = c;
 
329
        return CL_SUCCESS;
 
330
}
 
331
 
 
332
static inline int buf_outs(const char *s, struct buf *buf)
 
333
{
 
334
        const size_t buf_len = sizeof(buf->buf);
 
335
        size_t i;
 
336
 
 
337
        i = buf->pos;
 
338
        while(*s) {
 
339
                while(i < buf_len && *s) {
 
340
                        if(isspace(*s))
 
341
                                buf->buf[i++] = ' ';
 
342
                        else
 
343
                                buf->buf[i++] = tolower((unsigned char)(*s));
 
344
                        ++s;
 
345
                }
 
346
                if(i == buf_len) {
 
347
                        if(write(buf->outfd, buf->buf, buf_len) < 0)
 
348
                                return CL_EIO;
 
349
                       i = 0;
 
350
                }
 
351
        }
 
352
        buf->pos = i;
 
353
        return CL_SUCCESS;
 
354
}
 
355
 
 
356
static inline void output_space(char last, char current, struct buf *out)
 
357
{
 
358
        if(isalnum(last) && isalnum(current))
 
359
                buf_outc(' ', out);
 
360
}
 
361
 
 
362
 
 
363
/* return class of last character */
 
364
static char output_token(const yystype *token, struct scope *scope, struct buf *out, char lastchar)
 
365
{
 
366
        char sbuf[128];
 
367
        const char *s = TOKEN_GET(token, cstring);
 
368
        /* TODO: use a local buffer, instead of FILE* */
 
369
        switch(token->type) {
 
370
                case TOK_StringLiteral:
 
371
                        output_space(lastchar,'"', out);
 
372
                        buf_outc('"', out);
 
373
                        if(s) {
 
374
                                buf_outs(s, out);
 
375
                        }
 
376
                        buf_outc('"', out);
 
377
                        return '\"';
 
378
                case TOK_NumericInt:
 
379
                        output_space(lastchar,'0', out);
 
380
                        snprintf(sbuf, sizeof(sbuf), "%ld", TOKEN_GET(token, ival));
 
381
                        buf_outs(sbuf, out);
 
382
                        return '0';
 
383
                case TOK_NumericFloat:
 
384
                        output_space(lastchar,'0', out);
 
385
                        snprintf(sbuf, sizeof(sbuf), "%g", TOKEN_GET(token, dval));
 
386
                        buf_outs(sbuf, out);
 
387
                        return '0';
 
388
                case TOK_IDENTIFIER_NAME:
 
389
                        output_space(lastchar,'a', out);
 
390
                        if(s) {
 
391
                                long id = scope_lookup(scope, s, strlen(s));
 
392
                                if(id == -1) {
 
393
                                        /* identifier not normalized */
 
394
                                        buf_outs(s, out);
 
395
                                } else {
 
396
                                        snprintf(sbuf, sizeof(sbuf), "n%03ld",id);
 
397
                                        buf_outs(sbuf, out);
 
398
                                }
 
399
                        }
 
400
                        return 'a';
 
401
                case TOK_FUNCTION:
 
402
                        output_space(lastchar,'a', out);
 
403
                        buf_outs("function",out);
 
404
                        return 'a';
 
405
                default:
 
406
                        if(s) {
 
407
                                const size_t len = strlen(s);
 
408
                                output_space(lastchar,s[0], out);
 
409
                                buf_outs(s, out);
 
410
                                return len ? s[len-1] : '\0';
 
411
                        }
 
412
                        return '\0';
 
413
        }
 
414
}
 
415
 
 
416
/*
 
417
 * We can't delete the scope as soon as we see a }, because
 
418
 * we still need the hashmap from it.
 
419
 *
 
420
 * If we would normalize all the identifiers, and output when a scope is closed,
 
421
 * then it would be impossible to normalize calls to other functions.
 
422
 *
 
423
 * So we need to keep all scopes in memory, to do this instead of scope_done, we
 
424
 * simply just set current = current->parent when a scope is closed.
 
425
 * We keep a list of all scopes created in parser_state-> When we parsed
 
426
 * everything, we output everything, and then delete all scopes.
 
427
 *
 
428
 * We also need to know where to switch scopes on the second pass, so for
 
429
 * TOK_FUNCTION types we will use another pointer, that points to the scope
 
430
 * (added to yystype's union).
 
431
 *
 
432
 * We lookup the identifier in the scope (using scope_lookup, it looks in parent
 
433
 * scopes too), if ID is found then output (n%3d, Id),
 
434
 * otherwise output the identifier as is.
 
435
 *
 
436
 * To make  it easier to match sigs, we do a xfrm : 
 
437
 * 'function ID1 (..'. => 'n%3d = function (...'
 
438
 */
 
439
 
 
440
/*
 
441
 * we'll add all identifier to the scope's map
 
442
 * those that are not decl. will have initial ID -1
 
443
 * if we later see a decl for it in same scope, it'll automatically get a
 
444
 * correct ID.
 
445
 *
 
446
 * When parsing of local scope is done, we take any ID -1 identifiers,
 
447
 * and push them up one level (careful not to overwrite existing IDs).
 
448
 *
 
449
 * it would be nice if the tokens would contain a link to the entry in the
 
450
 * hashtab, a link that automatically gets updated when the element is moved
 
451
 * (pushed up). This would prevent subsequent lookups in the map,
 
452
 * when we want to output the tokens.
 
453
 * There is no easy way to do that, so we just do another lookup
 
454
 *
 
455
 */
 
456
 
 
457
/*
 
458
 * This actually works, redefining foo:
 
459
 * function foo() {
 
460
 *   var foo=5; alert(foo);
 
461
 * }
 
462
 * So we can't treat function names just as any other identifier?
 
463
 * We can, because you can no longer call foo, if you redefined it as a var.
 
464
 * So if we rename both foo-s with same name, it will have same behaviour.
 
465
 *
 
466
 * This means that a new scope should begin after function, and not after
 
467
 * function ... (.
 
468
 */
 
469
 
 
470
static void scope_free_all(struct scope *p)
 
471
{
 
472
        struct scope *nxt;
 
473
        do {
 
474
                nxt = p->nxt;
 
475
                scope_done(p);
 
476
                p = nxt;
 
477
        } while(p);
 
478
}
 
479
 
 
480
void cli_strtokenize(char *buffer, const char delim, const size_t token_count, const char **tokens);
 
481
static int match_parameters(const yystype *tokens, const char ** param_names, size_t count)
 
482
{
 
483
        size_t i,j=0;
 
484
        if(tokens[0].type != TOK_PAR_OPEN)
 
485
                return -1;
 
486
        i=1;
 
487
        while(count--) {
 
488
                const char *token_val = TOKEN_GET(&tokens[i], cstring);
 
489
                if(tokens[i].type != TOK_IDENTIFIER_NAME ||
 
490
                   !token_val ||
 
491
                   strcmp(token_val, param_names[j++]))
 
492
                        return -1;
 
493
                ++i;
 
494
                if((count && tokens[i].type != TOK_COMMA)
 
495
                   || (!count && tokens[i].type != TOK_PAR_CLOSE))
 
496
                        return -1;
 
497
                ++i;
 
498
        }
 
499
        return 0;
 
500
}
 
501
 
 
502
static const char *de_packer_3[] = {"p","a","c","k","e","r"};
 
503
static const char *de_packer_2[] = {"p","a","c","k","e","d"};
 
504
 
 
505
 
 
506
#ifndef MAX
 
507
#define MAX(a, b) ((a)>(b) ? (a) : (b))
 
508
#endif
 
509
 
 
510
static inline char *textbuffer_done(yyscan_t scanner)
 
511
{
 
512
        /* free unusued memory */
 
513
        char *str = cli_realloc(scanner->buf.data, scanner->buf.pos);
 
514
        if(!str) {
 
515
                str = scanner->buf.data;
 
516
        }
 
517
        scanner->yytext = str;
 
518
        scanner->yylen = scanner->buf.pos - 1;
 
519
        memset(&scanner->buf, 0, sizeof(scanner->buf));
 
520
        return str;
 
521
}
 
522
 
 
523
#define MODULE "JS-Norm: "
 
524
 
 
525
static void free_token(yystype *token)
 
526
{
 
527
        if(token->vtype == vtype_string) {
 
528
                free(token->val.string);
 
529
                token->val.string = NULL;
 
530
        }
 
531
}
 
532
 
 
533
static int replace_token_range(struct tokens *dst, size_t start, size_t end, const struct tokens *with)
 
534
{
 
535
        const size_t len = with ? with->cnt : 0;
 
536
        size_t i;
 
537
        cli_dbgmsg(MODULE "Replacing tokens %lu - %lu with %lu tokens\n",start, end, len);
 
538
        if(start >= dst->cnt || end > dst->cnt)
 
539
                return -1;
 
540
        for(i=start;i<end;i++) {
 
541
                free_token(&dst->data[i]);
 
542
        }
 
543
        if(tokens_ensure_capacity(dst, dst->cnt - (end-start) + len) < 0)
 
544
                return CL_EMEM;
 
545
        memmove(&dst->data[start+len], &dst->data[end], (dst->cnt - end) * sizeof(dst->data[0]));
 
546
        if(with && len > 0) {
 
547
                memcpy(&dst->data[start], with->data, len * sizeof(dst->data[0]));
 
548
        }
 
549
        dst->cnt = dst->cnt - (end-start) + len;
 
550
        return CL_SUCCESS;
 
551
}
 
552
 
 
553
static int append_tokens(struct tokens *dst, const struct tokens *src)
 
554
{
 
555
        if(!dst || !src)
 
556
                return CL_ENULLARG;
 
557
        if(tokens_ensure_capacity(dst, dst->cnt + src->cnt) == -1)
 
558
                return CL_EMEM;
 
559
        cli_dbgmsg(MODULE "Appending %lu tokens\n", src->cnt);
 
560
        memcpy(&dst->data[dst->cnt], src->data, src->cnt * sizeof(dst->data[0]));
 
561
        dst->cnt += src->cnt;
 
562
        return CL_SUCCESS;
 
563
}
 
564
 
 
565
static void decode_de(yystype *params[], struct text_buffer *txtbuf)
 
566
{
 
567
        const char *p = TOKEN_GET(params[0], cstring);
 
568
        const long a = TOKEN_GET(params[1], ival);
 
569
        /*const char *c = params[2];*/
 
570
        char *k = TOKEN_GET(params[3], string);
 
571
        /*const char *r = params[5];*/
 
572
 
 
573
        unsigned val=0;
 
574
        unsigned nsplit = 0;
 
575
        const char* o;
 
576
        const char **tokens;
 
577
 
 
578
        memset(txtbuf, 0, sizeof(*txtbuf));
 
579
        if(!p || !k )
 
580
                return;
 
581
        for(o = k; *o; o++) if(*o == '|') nsplit++;
 
582
        nsplit++;
 
583
        tokens = malloc(sizeof(char*)*nsplit);
 
584
        if(!tokens) {
 
585
                return;
 
586
        }
 
587
        cli_strtokenize(k,'|',nsplit, tokens);
 
588
 
 
589
        do {
 
590
                while(*p && !isalnum(*p)) {
 
591
                        if(*p=='\\' && (p[1] == '\'' || p[1] == '\"'))
 
592
                                p++;
 
593
                        else
 
594
                                textbuffer_putc(txtbuf, *p++);
 
595
                }
 
596
                if(!*p) break;
 
597
                val = 0;
 
598
                o = p;
 
599
                while(*p && isalnum(*p)) {
 
600
                        unsigned x;
 
601
                        unsigned char v = *p++;
 
602
                        /* TODO: use a table here */
 
603
                        if(v >= 'a') x = 10+v-'a';
 
604
                        else if(v >= 'A') x = 36+v-'A';
 
605
                        else x = v-'0';
 
606
                        val = val*a+x;
 
607
                }
 
608
                if(val >= nsplit || !tokens[val] || !tokens[val][0])
 
609
                        while(o!=p)
 
610
                                textbuffer_putc(txtbuf, *o++);
 
611
                else    textbuffer_append(txtbuf, tokens[val]);
 
612
        } while (*p);
 
613
        free(tokens);
 
614
        textbuffer_append(txtbuf, "\0");
 
615
}
 
616
 
 
617
struct decode_result {
 
618
        struct text_buffer txtbuf;
 
619
        size_t pos_begin;
 
620
        size_t pos_end;
 
621
        unsigned append:1; /* 0: tokens are replaced with new token(s),
 
622
                            1: old tokens are deleted, new ones appended at the end */
 
623
};
 
624
 
 
625
static void handle_de(yystype *tokens, size_t start, const size_t cnt, const char *name, struct decode_result *res)
 
626
{
 
627
        /* find function decl. end */
 
628
        size_t i, nesting = 1, j;
 
629
        yystype* parameters [6];
 
630
        const size_t parameters_cnt = 6;
 
631
 
 
632
        for(i=start;i < cnt; i++) {
 
633
                if(tokens[i].type == TOK_FUNCTION) {
 
634
                        if(TOKEN_GET(&tokens[i], scope))
 
635
                                nesting++;
 
636
                        else
 
637
                                nesting--;
 
638
                        if(!nesting)
 
639
                                break;
 
640
                }
 
641
        }
 
642
        if(nesting)
 
643
                return;
 
644
        memset(parameters, 0, sizeof(parameters));
 
645
        if(name) {
 
646
                /* find call to function */
 
647
                for(;i+2 < cnt; i++) {
 
648
                        const char* token_val = TOKEN_GET(&tokens[i], cstring);
 
649
                        if(tokens[i].type == TOK_IDENTIFIER_NAME &&
 
650
                           token_val &&
 
651
                           !strcmp(name, token_val) &&
 
652
                           tokens[i+1].type == TOK_PAR_OPEN) {
 
653
 
 
654
                                i += 2;
 
655
                                for(j = 0;j < parameters_cnt && i < cnt;j++) {
 
656
                                        parameters[j] = &tokens[i++];
 
657
                                        if(j != parameters_cnt-1)
 
658
                                                while (tokens[i].type != TOK_COMMA && i < cnt) i++;
 
659
                                        else
 
660
                                                while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++;
 
661
                                        i++;
 
662
                                }
 
663
                                if(j == parameters_cnt)
 
664
                                        decode_de(parameters, &res->txtbuf);
 
665
                        }
 
666
                }
 
667
        } else {
 
668
                while(i<cnt && tokens[i].type != TOK_PAR_OPEN) i++;
 
669
                ++i;
 
670
                if(i >= cnt) return;
 
671
                /* TODO: move this v to another func */
 
672
                                for(j = 0;j < parameters_cnt && i < cnt;j++) {
 
673
                                        parameters[j] = &tokens[i++];
 
674
                                        if(j != parameters_cnt-1)
 
675
                                                while (tokens[i].type != TOK_COMMA && i < cnt) i++;
 
676
                                        else
 
677
                                                while (tokens[i].type != TOK_PAR_CLOSE && i < cnt) i++;
 
678
                                        i++;
 
679
                                }
 
680
                                if(j == parameters_cnt)
 
681
                                        decode_de(parameters, &res->txtbuf);
 
682
        }
 
683
        if(parameters[0] && parameters[parameters_cnt-1]) {
 
684
                res->pos_begin = parameters[0] - tokens;
 
685
                res->pos_end = parameters[parameters_cnt-1] - tokens + 1;
 
686
                if(tokens[res->pos_end].type == TOK_BRACKET_OPEN &&
 
687
                                tokens[res->pos_end+1].type == TOK_BRACKET_CLOSE &&
 
688
                                tokens[res->pos_end+2].type == TOK_PAR_CLOSE)
 
689
                        res->pos_end += 3; /* {}) */
 
690
                else
 
691
                        res->pos_end++; /* ) */
 
692
        }
 
693
}
 
694
 
 
695
static int handle_unescape(struct tokens *tokens, size_t start, const size_t cnt)
 
696
{
 
697
        if(tokens->data[start].type == TOK_StringLiteral) {
 
698
                char *R;
 
699
                struct tokens new_tokens;
 
700
                yystype tok;
 
701
 
 
702
                R = cli_unescape(TOKEN_GET(&tokens->data[start], cstring));
 
703
                tok.type = TOK_StringLiteral;
 
704
                TOKEN_SET(&tok, string, R);
 
705
                new_tokens.capacity = new_tokens.cnt = 1;
 
706
                new_tokens.data = &tok;
 
707
                if(replace_token_range(tokens, start-2, start+2, &new_tokens) < 0)
 
708
                        return CL_EMEM;
 
709
        }
 
710
        return CL_SUCCESS;
 
711
}
 
712
 
 
713
 
 
714
/* scriptasylum dot com's JS encoder */
 
715
static void handle_df(const yystype *tokens, size_t start, const size_t cnt, struct decode_result *res)
 
716
{
 
717
        char *str, *s1;
 
718
        size_t len, s1_len, i;
 
719
        unsigned char clast;
 
720
        char *R;
 
721
 
 
722
        if(tokens[start].type != TOK_StringLiteral)
 
723
                return;
 
724
        str = TOKEN_GET(&tokens[start], string);
 
725
        if(!str)
 
726
                return;
 
727
        len = strlen(str);
 
728
        clast = str[len-1] - '0';
 
729
 
 
730
        str[len-1] = '\0';
 
731
        s1 = cli_unescape(str);
 
732
        s1_len = strlen(s1);
 
733
        for(i=0;i<s1_len;i++) {
 
734
                s1[i] -= clast;
 
735
        }
 
736
        R = cli_unescape(s1);
 
737
        free(s1);
 
738
        res->pos_begin = start-2;
 
739
        res->pos_end = start+2;
 
740
        res->txtbuf.data = R;
 
741
        res->txtbuf.pos = strlen(R);
 
742
        res->append = 1;
 
743
}
 
744
 
 
745
 
 
746
 
 
747
static void handle_eval(struct tokens *tokens, size_t start, struct decode_result *res)
 
748
{
 
749
        res->txtbuf.data = TOKEN_GET(&tokens->data[start], string);
 
750
        if(res->txtbuf.data && tokens->data[start+1].type == TOK_PAR_CLOSE) {
 
751
                TOKEN_SET(&tokens->data[start], string, NULL);
 
752
                res->txtbuf.pos = strlen(res->txtbuf.data);
 
753
                res->pos_begin = start-2;
 
754
                res->pos_end = start+2;
 
755
        }
 
756
}
 
757
 
 
758
static void run_folders(struct tokens *tokens)
 
759
{
 
760
  size_t i;
 
761
 
 
762
  for(i = 0; i < tokens->cnt; i++) {
 
763
          const char *cstring = TOKEN_GET(&tokens->data[i], cstring);
 
764
          if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
 
765
                    cstring &&
 
766
                    !strcmp("unescape", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
 
767
 
 
768
                  handle_unescape(tokens, i+2, tokens->cnt);
 
769
          }
 
770
  }
 
771
}
 
772
 
 
773
static inline int state_update_scope(struct parser_state *state, const yystype *token)
 
774
{
 
775
        if(token->type == TOK_FUNCTION) {
 
776
                struct scope *scope = TOKEN_GET(token, scope);
 
777
                if(scope) {
 
778
                        state->current = scope;
 
779
                }
 
780
                else {
 
781
                        /* dummy token marking function end */
 
782
                        if(state->current->parent)
 
783
                                state->current = state->current->parent;
 
784
                        /* don't output this token, it is just a dummy marker */
 
785
                        return 0;
 
786
                }
 
787
        }
 
788
        return 1;
 
789
}
 
790
 
 
791
static void run_decoders(struct parser_state *state)
 
792
{
 
793
  size_t i;
 
794
  const char* name;
 
795
  struct tokens *tokens = &state->tokens;
 
796
 
 
797
  for(i = 0; i < tokens->cnt; i++) {
 
798
          const char *cstring = TOKEN_GET(&tokens->data[i], cstring);
 
799
          struct decode_result res;
 
800
          res.pos_begin = res.pos_end = 0;
 
801
          res.append = 0;
 
802
          if(tokens->data[i].type == TOK_FUNCTION && i+13 < tokens->cnt) {
 
803
                  name = NULL;
 
804
                  ++i;
 
805
                  if(tokens->data[i].type == TOK_IDENTIFIER_NAME) {
 
806
                          cstring = TOKEN_GET(&tokens->data[i], cstring);
 
807
                          name = cstring;
 
808
                          ++i;
 
809
                  }
 
810
                  if(match_parameters(&tokens->data[i], de_packer_3, sizeof(de_packer_3)/sizeof(de_packer_3[0])) != -1
 
811
                     || match_parameters(&tokens->data[i], de_packer_2, sizeof(de_packer_2)/sizeof(de_packer_2[0])) != -1)  {
 
812
                          /* find function decl. end */
 
813
                          handle_de(tokens->data, i, tokens->cnt, name, &res);
 
814
                  }
 
815
          } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
 
816
                    cstring &&
 
817
                    !strcmp("dF", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
 
818
                  /* TODO: also match signature of dF function (possibly
 
819
                   * declared using unescape */
 
820
 
 
821
                  handle_df(tokens->data, i+2, tokens->cnt, &res);
 
822
          } else if(i+2 < tokens->cnt && tokens->data[i].type == TOK_IDENTIFIER_NAME &&
 
823
                          cstring &&
 
824
                          !strcmp("eval", cstring) && tokens->data[i+1].type == TOK_PAR_OPEN) {
 
825
                  handle_eval(tokens, i+2, &res);
 
826
          }
 
827
        if(res.pos_end > res.pos_begin) {
 
828
                struct tokens parent_tokens;
 
829
                if(res.pos_end < tokens->cnt && tokens->data[res.pos_end].type == TOK_SEMICOLON)
 
830
                        res.pos_end++;
 
831
                parent_tokens = state->tokens;/* save current tokens */
 
832
                /* initialize embedded context */
 
833
                memset(&state->tokens, 0, sizeof(state->tokens));
 
834
                if(++state->rec > 16)
 
835
                        cli_dbgmsg(MODULE "recursion limit reached\n");
 
836
                else {
 
837
                        cli_js_process_buffer(state, res.txtbuf.data, res.txtbuf.pos);
 
838
                        --state->rec;
 
839
                }
 
840
                free(res.txtbuf.data);
 
841
                /* state->tokens still refers to the embedded/nested context
 
842
                 * here */
 
843
                if(!res.append) {
 
844
                        replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, &state->tokens);
 
845
                } else {
 
846
                        /* delete tokens */
 
847
                        replace_token_range(&parent_tokens, res.pos_begin, res.pos_end, NULL);
 
848
                        append_tokens(&parent_tokens, &state->tokens);
 
849
                }
 
850
                /* end of embedded context, restore tokens state */
 
851
                free(state->tokens.data);
 
852
                state->tokens = parent_tokens;
 
853
        }
 
854
          state_update_scope(state, &state->tokens.data[i]);
 
855
  }
 
856
}
 
857
 
 
858
void cli_js_parse_done(struct parser_state* state)
 
859
{
 
860
        struct tokens * tokens = &state->tokens;
 
861
        size_t par_balance = 0, i;
 
862
        char end = '\0';
 
863
        YYSTYPE val;
 
864
 
 
865
        cli_dbgmsg(MODULE "in cli_js_parse_done()\n");
 
866
        /* close unfinished token */
 
867
        switch (state->scanner->state) {
 
868
                case DoubleQString:
 
869
                        end = '"';
 
870
                        break;
 
871
                case SingleQString:
 
872
                        end = '\'';
 
873
                        break;
 
874
        }
 
875
        if (end != '\0')
 
876
                cli_js_process_buffer(state, &end, 1);
 
877
        /* close remaining paranthesis */
 
878
        for (i=0;i<tokens->cnt;i++) {
 
879
                if (tokens->data[i].type == TOK_PAR_OPEN)
 
880
                        par_balance++;
 
881
                else if (tokens->data[i].type == TOK_PAR_CLOSE && par_balance > 0)
 
882
                        par_balance--;
 
883
        }
 
884
        if (par_balance > 0) {
 
885
                memset(&val, 0, sizeof(val));
 
886
                val.type = TOK_PAR_CLOSE;
 
887
                TOKEN_SET(&val, cstring, ")");
 
888
                while (par_balance-- > 0) {
 
889
                        add_token(state, &val);
 
890
                }
 
891
        }
 
892
 
 
893
        /* we had to close unfinished strings, paranthesis,
 
894
         * so that the folders/decoders can run properly */
 
895
        run_folders(&state->tokens);
 
896
        run_decoders(state);
 
897
 
 
898
        yylex_destroy(state->scanner);
 
899
        state->scanner = NULL;
 
900
}
 
901
 
 
902
 
 
903
void cli_js_output(struct parser_state *state, const char *tempdir)
 
904
{
 
905
        unsigned i;
 
906
        struct buf buf;
 
907
        char lastchar = '\0';
 
908
        char filename[1024];
 
909
 
 
910
        snprintf(filename, 1024, "%s/javascript", tempdir);
 
911
 
 
912
        buf.pos = 0;
 
913
        buf.outfd = open(filename, O_CREAT | O_WRONLY, 0600);
 
914
        if(buf.outfd < 0) {
 
915
                cli_errmsg(MODULE "cannot open output file for writing: %s\n", filename);
 
916
                return;
 
917
        }
 
918
        /* append to file */
 
919
        if(lseek(buf.outfd, 0, SEEK_END) != 0) {
 
920
                /* separate multiple scripts with \n */
 
921
                buf_outc('\n', &buf);
 
922
        }
 
923
        buf_outs("<script>", &buf);
 
924
        state->current = state->global;
 
925
        for(i = 0; i < state->tokens.cnt; i++) {
 
926
                if(state_update_scope(state, &state->tokens.data[i]))
 
927
                        lastchar = output_token(&state->tokens.data[i], state->current, &buf, lastchar);
 
928
        }
 
929
        /* add /script if not already there */
 
930
        if(buf.pos < 9 || memcmp(buf.buf + buf.pos - 9, "</script>", 9))
 
931
                buf_outs("</script>", &buf);
 
932
        if(write(buf.outfd, buf.buf, buf.pos) < 0) {
 
933
                cli_dbgmsg(MODULE "I/O error\n");
 
934
        }
 
935
        close(buf.outfd);
 
936
        cli_dbgmsg(MODULE "dumped/appended normalized script to: %s\n",filename);
 
937
}
 
938
 
 
939
void cli_js_destroy(struct parser_state *state)
 
940
{
 
941
        size_t i;
 
942
        if(!state)
 
943
                return;
 
944
        scope_free_all(state->list);
 
945
        for(i=0;i<state->tokens.cnt;i++) {
 
946
                free_token(&state->tokens.data[i]);
 
947
        }
 
948
        free(state->tokens.data);
 
949
        /* detect use after free */
 
950
        if(state->scanner)
 
951
                yylex_destroy(state->scanner);
 
952
        memset(state, 0x55, sizeof(*state));
 
953
        free(state);
 
954
        cli_dbgmsg(MODULE "cli_js_destroy() done\n");
 
955
}
 
956
 
 
957
/* buffer is html-normlike "chunk", if original file is bigger than buffer,
 
958
 * we rewind to a space, so we'll know that tokens won't be broken in half at
 
959
 * the end of a buffer. All tokens except string-literals of course.
 
960
 * So we can assume that after the buffer there is either a space, EOF, or a
 
961
 * chunk of text not containing whitespace at all (for which we care only if its
 
962
 * a stringliteral)*/
 
963
void cli_js_process_buffer(struct parser_state *state, const char *buf, size_t n)
 
964
{
 
965
        struct scope* current = state->current;
 
966
        YYSTYPE val;
 
967
        int yv;
 
968
        YY_BUFFER_STATE yyb;
 
969
 
 
970
        if(!state->global) {
 
971
                /* this state has either not been initialized,
 
972
                 * or cli_js_parse_done() was already called on it */
 
973
                cli_warnmsg(MODULE "invalid state\n");
 
974
                return;
 
975
        }
 
976
        yyb = yy_scan_bytes(buf, n, state->scanner);
 
977
        memset(&val, 0, sizeof(val));
 
978
        val.vtype = vtype_undefined;
 
979
        /* on EOF yylex will return 0 */
 
980
        while( (yv=yylex(&val, state->scanner)) != 0)
 
981
        {
 
982
                const char *text;
 
983
                size_t leng;
 
984
 
 
985
                val.type = yv;
 
986
                switch(yv) {
 
987
                        case TOK_VAR:
 
988
                                current->fsm_state = InsideVar;
 
989
                                break;
 
990
                        case TOK_IDENTIFIER_NAME:
 
991
                                text = yyget_text(state->scanner);
 
992
                                leng = yyget_leng(state->scanner);
 
993
                                if(current->last_token == TOK_DOT) {
 
994
                                        /* this is a member name, don't normalize
 
995
                                        */
 
996
                                        TOKEN_SET(&val, string, cli_strdup(text));
 
997
                                        val.type = TOK_UNNORM_IDENTIFIER;
 
998
                                } else {
 
999
                                        switch(current->fsm_state) {
 
1000
                                                case WaitParameterList:
 
1001
                                                        state->syntax_errors++;
 
1002
                                                        /* fall through */
 
1003
                                                case Base:
 
1004
                                                case InsideInitializer:
 
1005
                                                        TOKEN_SET(&val, cstring, scope_use(current, text, leng));
 
1006
                                                        break;
 
1007
                                                case InsideVar:
 
1008
                                                case InsideFunctionDecl:
 
1009
                                                        TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
 
1010
                                                        current->fsm_state = InsideInitializer;
 
1011
                                                        current->brackets = 0;
 
1012
                                                        break;
 
1013
                                                case WaitFunctionName:
 
1014
                                                        TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state));
 
1015
                                                        current->fsm_state = WaitParameterList;
 
1016
                                                        break;
 
1017
                                        }
 
1018
                                }
 
1019
                                break;
 
1020
                        case TOK_PAR_OPEN:
 
1021
                                switch(current->fsm_state) {
 
1022
                                        case WaitFunctionName:
 
1023
                                                /* fallthrough */
 
1024
                                        case WaitParameterList:
 
1025
                                                current->fsm_state = InsideFunctionDecl;
 
1026
                                                break;
 
1027
                                        default:
 
1028
                                                /* noop */
 
1029
                                                break;
 
1030
                                }
 
1031
                                break;
 
1032
                        case TOK_PAR_CLOSE:
 
1033
                                switch(current->fsm_state) {
 
1034
                                        case WaitFunctionName:
 
1035
                                                state->syntax_errors++;
 
1036
                                                break;
 
1037
                                        case WaitParameterList:
 
1038
                                                current->fsm_state = Base;
 
1039
                                                break;
 
1040
                                        default:
 
1041
                                                /* noop */
 
1042
                                                break;
 
1043
                                }
 
1044
                                break;
 
1045
                        case TOK_CURLY_BRACE_OPEN:
 
1046
                                switch(current->fsm_state) {
 
1047
                                        case WaitFunctionName:
 
1048
                                                /* fallthrough */
 
1049
                                        case WaitParameterList:
 
1050
                                        case InsideFunctionDecl:
 
1051
                                                /* in a syntactically correct
 
1052
                                                 * file, we would already be in
 
1053
                                                 * the Base state when we see a {
 
1054
                                                 */
 
1055
                                                current->fsm_state = Base;
 
1056
                                                /* fall-through */
 
1057
                                        case InsideVar:
 
1058
                                        case InsideInitializer:
 
1059
                                                state->syntax_errors++;
 
1060
                                                /* fall-through */
 
1061
                                        case Base:
 
1062
                                        default:
 
1063
                                                current->blocks++;
 
1064
                                                break;
 
1065
                                }
 
1066
                                break;
 
1067
                                        case TOK_CURLY_BRACE_CLOSE:
 
1068
                                if(current->blocks > 0)
 
1069
                                        current->blocks--;
 
1070
                                else
 
1071
                                        state->syntax_errors++;
 
1072
                                if(!current->blocks) {
 
1073
                                        if(current->parent) {
 
1074
                                                /* add dummy FUNCTION token to
 
1075
                                                 * mark function end */
 
1076
                                                TOKEN_SET(&val, cstring, "}");
 
1077
                                                add_token(state, &val);
 
1078
                                                TOKEN_SET(&val, scope, NULL);
 
1079
                                                val.type = TOK_FUNCTION;
 
1080
 
 
1081
                                                state->current = current = current->parent;
 
1082
                                        } else{
 
1083
                                                /* extra } */
 
1084
                                                state->syntax_errors++;
 
1085
                                }
 
1086
                                }
 
1087
                                break;
 
1088
                        case TOK_BRACKET_OPEN:
 
1089
                                current->brackets++;
 
1090
                                break;
 
1091
                        case TOK_BRACKET_CLOSE:
 
1092
                                if(current->brackets > 0)
 
1093
                                        current->brackets--;
 
1094
                                else
 
1095
                                        state->syntax_errors++;
 
1096
                                break;
 
1097
                        case TOK_COMMA:
 
1098
                                if (current->fsm_state == InsideInitializer && current->brackets == 0 && current->blocks == 0) {
 
1099
                                        /* initializer ended only if we
 
1100
                                         * encountered a comma, and [] are
 
1101
                                         * balanced.
 
1102
                                         * This avoids switching state on:
 
1103
                                         * var x = [4,y,u];*/
 
1104
                                        current->fsm_state = InsideVar;
 
1105
                                }
 
1106
                                break;
 
1107
                        case TOK_SEMICOLON:
 
1108
                                if (current->brackets == 0 && current->blocks == 0) {
 
1109
                                        /* avoid switching state on unbalanced []:
 
1110
                                         * var x = [test;testi]; */
 
1111
                                        current->fsm_state = Base;
 
1112
                                }
 
1113
                                break;
 
1114
                        case TOK_FUNCTION:
 
1115
                                current = scope_new(state);
 
1116
                                current->fsm_state = WaitFunctionName;
 
1117
                                TOKEN_SET(&val, scope, state->current);
 
1118
                                break;
 
1119
                        case TOK_StringLiteral:
 
1120
                                if(state->tokens.cnt > 0 && state->tokens.data[state->tokens.cnt-1].type == TOK_PLUS) {
 
1121
                                        /* see if can fold */
 
1122
                                        yystype *prev_string = &state->tokens.data[state->tokens.cnt-2];
 
1123
                                        if(prev_string->type == TOK_StringLiteral) {
 
1124
                                                char *str = TOKEN_GET(prev_string, string);
 
1125
                                                size_t str_len = strlen(str);
 
1126
 
 
1127
                                                text = yyget_text(state->scanner);
 
1128
                                                leng = yyget_leng(state->scanner);
 
1129
 
 
1130
 
 
1131
                                                /* delete TOK_PLUS */
 
1132
                                                free_token(&state->tokens.data[--state->tokens.cnt]);
 
1133
 
 
1134
                                                str = cli_realloc(str, str_len + leng + 1);
 
1135
                                                strncpy(str+str_len, text, leng);
 
1136
                                                str[str_len + leng] = '\0';
 
1137
                                                TOKEN_SET(prev_string, string, str);
 
1138
                                                free(val.val.string);
 
1139
                                                memset(&val, 0, sizeof(val));
 
1140
                                                val.vtype = vtype_undefined;
 
1141
                                                continue;
 
1142
                                        }
 
1143
                                }
 
1144
                                break;
 
1145
                }
 
1146
                if(val.vtype == vtype_undefined) {
 
1147
                        text = yyget_text(state->scanner);
 
1148
                        TOKEN_SET(&val, string, cli_strdup(text));
 
1149
                        abort();
 
1150
                }
 
1151
                add_token(state, &val);
 
1152
                current->last_token = yv;
 
1153
                memset(&val, 0, sizeof(val));
 
1154
                val.vtype = vtype_undefined;
 
1155
        }
 
1156
        yy_delete_buffer(yyb, state->scanner);
 
1157
}
 
1158
 
 
1159
struct parser_state *cli_js_init(void)
 
1160
{
 
1161
        struct parser_state *state = cli_calloc(1, sizeof(*state));
 
1162
        if(!state)
 
1163
                return NULL;
 
1164
        if(!scope_new(state)) {
 
1165
                free(state);
 
1166
                return NULL;
 
1167
        }
 
1168
        state->global = state->current;
 
1169
 
 
1170
        if(yylex_init(&state->scanner)) {
 
1171
                scope_done(state->global);
 
1172
                free(state);
 
1173
                return NULL;
 
1174
        }
 
1175
        yyset_debug(1, state->scanner);
 
1176
        cli_dbgmsg(MODULE "cli_js_init() done\n");
 
1177
        return state;
 
1178
}
 
1179
 
 
1180
/*-------------- tokenizer ---------------------*/
 
1181
enum char_class {
 
1182
        Whitespace,
 
1183
        Slash,
 
1184
        Operator,
 
1185
        DQuote,
 
1186
        SQuote,
 
1187
        Digit,
 
1188
        IdStart,
 
1189
        BracketOpen = TOK_BRACKET_OPEN,
 
1190
        BracketClose = TOK_BRACKET_CLOSE,
 
1191
        Comma = TOK_COMMA,
 
1192
        CurlyOpen = TOK_CURLY_BRACE_OPEN,
 
1193
        CurlyClose = TOK_CURLY_BRACE_CLOSE,
 
1194
        ParOpen = TOK_PAR_OPEN,
 
1195
        ParClose = TOK_PAR_CLOSE,
 
1196
        Dot = TOK_DOT,
 
1197
        SemiColon = TOK_SEMICOLON,
 
1198
        Nop
 
1199
};
 
1200
 
 
1201
#define SL Slash
 
1202
#define DG Digit
 
1203
#define DQ DQuote
 
1204
#define SQ SQuote
 
1205
#define ID IdStart
 
1206
#define OP Operator
 
1207
#define WS Whitespace
 
1208
#define BO BracketOpen
 
1209
#define BC BracketClose
 
1210
#define CM Comma
 
1211
#define CO CurlyOpen
 
1212
#define CC CurlyClose
 
1213
#define PO ParOpen
 
1214
#define PC ParClose
 
1215
#define DT Dot
 
1216
#define SC SemiColon
 
1217
#define NA Nop
 
1218
 
 
1219
static const enum char_class ctype[256] = {
 
1220
        NA, NA, NA, NA, NA, NA, NA, NA, NA, WS, WS, WS, NA, WS, NA, NA,
 
1221
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1222
        WS, OP, DQ, NA, ID, OP, OP, SQ, PO, PC, OP, OP, CM, OP, DT, SL,
 
1223
        DG, DG, DG, DG, DG, DG, DG, DG, DG, DG, OP, SC, OP, OP, OP, OP,
 
1224
        NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
 
1225
        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, BO, ID, BC, OP, ID,
 
1226
        NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
 
1227
        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, CO, OP, CC, OP, NA,
 
1228
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1229
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1230
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1231
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1232
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1233
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1234
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1235
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
 
1236
};
 
1237
 
 
1238
static const enum char_class id_ctype[256] = {
 
1239
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1240
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1241
        NA, NA, NA, NA, ID, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1242
        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA, NA,
 
1243
        NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
 
1244
        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, OP, NA, NA, ID,
 
1245
        NA, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID,
 
1246
        ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, ID, NA, NA, NA, NA, NA,
 
1247
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1248
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1249
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1250
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1251
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1252
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1253
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1254
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 
1255
};
 
1256
 
 
1257
#define CASE_SPECIAL_CHAR(C, S) case C: TOKEN_SET(lvalp, cstring, (S)); return cClass;
 
1258
 
 
1259
#define BUF_KEEP_SIZE 32768
 
1260
 
 
1261
static void textbuf_clean(struct text_buffer *buf)
 
1262
{
 
1263
        if(buf->capacity > BUF_KEEP_SIZE) {
 
1264
                buf->data = cli_realloc(buf->data, BUF_KEEP_SIZE);
 
1265
                buf->capacity = BUF_KEEP_SIZE;
 
1266
        }
 
1267
        buf->pos = 0;
 
1268
}
 
1269
 
 
1270
static inline int parseString(YYSTYPE *lvalp, yyscan_t scanner, const char q,
 
1271
                enum tokenizer_state tostate)
 
1272
{
 
1273
        size_t len;
 
1274
        /* look for " terminating the string */
 
1275
        const char *start = &scanner->in[scanner->pos], *end = start;
 
1276
        do {
 
1277
                const size_t siz = &scanner->in[scanner->insize] - end;
 
1278
                end = memchr(end, q, siz);
 
1279
                if(end && end > start && end[-1] == '\\') {
 
1280
                        ++end;
 
1281
                        continue;
 
1282
                }
 
1283
                break;
 
1284
        } while (1);
 
1285
        if(end && end >= start)
 
1286
                len = end - start;
 
1287
        else
 
1288
                len = scanner->insize - scanner->pos;
 
1289
        cli_textbuffer_append_normalize(&scanner->buf, start, len);
 
1290
        if(end) {
 
1291
                /* skip over end quote */
 
1292
                scanner->pos += len + 1;
 
1293
                textbuffer_putc(&scanner->buf, '\0');
 
1294
                TOKEN_SET(lvalp, string, textbuffer_done(scanner));
 
1295
                scanner->state = Initial;
 
1296
                assert(lvalp->val.string);
 
1297
                return TOK_StringLiteral;
 
1298
        } else {
 
1299
                scanner->pos += len;
 
1300
                /* unfinished string */
 
1301
                scanner->state = tostate;
 
1302
                return 0;
 
1303
        }
 
1304
}
 
1305
 
 
1306
static inline int parseDQString(YYSTYPE *lvalp, yyscan_t scanner)
 
1307
{
 
1308
        return parseString(lvalp, scanner, '"', DoubleQString);
 
1309
}
 
1310
 
 
1311
static inline int parseSQString(YYSTYPE *lvalp, yyscan_t scanner)
 
1312
{
 
1313
        return parseString(lvalp, scanner, '\'', SingleQString);
 
1314
}
 
1315
 
 
1316
static inline int parseNumber(YYSTYPE *lvalp, yyscan_t scanner)
 
1317
{
 
1318
        const unsigned char *in = (const unsigned char*)scanner->in;
 
1319
        int is_float = 0;
 
1320
        while(scanner->pos < scanner->insize) {
 
1321
                unsigned char c = in[scanner->pos++];
 
1322
                if(isdigit(c)) {
 
1323
                        textbuffer_putc(&scanner->buf, c);
 
1324
                        continue;
 
1325
                }
 
1326
                if(c =='.' && !is_float) {
 
1327
                        is_float = 1;
 
1328
                        textbuffer_putc(&scanner->buf, '.');
 
1329
                        continue;
 
1330
                }
 
1331
                if((c=='e' || c=='E') && is_float) {
 
1332
                        textbuffer_putc(&scanner->buf, c);
 
1333
                        if(scanner->pos < scanner->insize) {
 
1334
                                c = in[scanner->pos++];
 
1335
                                if(c == '+' || c == '-' || isdigit(c)) {
 
1336
                                        textbuffer_putc(&scanner->buf, c);
 
1337
                                        continue;
 
1338
                                }
 
1339
                        }
 
1340
                }
 
1341
                scanner->pos--;
 
1342
                textbuffer_putc(&scanner->buf, '\0');
 
1343
                scanner->state = Initial;
 
1344
                if(is_float) {
 
1345
                        TOKEN_SET(lvalp, dval, atof(scanner->buf.data));
 
1346
                        return TOK_NumericFloat;
 
1347
                } else {
 
1348
                        TOKEN_SET(lvalp, ival, atoi(scanner->buf.data));
 
1349
                        return TOK_NumericInt;
 
1350
                }
 
1351
        }
 
1352
        scanner->state = Number;
 
1353
        return 0;
 
1354
}
 
1355
 
 
1356
static inline int parseId(YYSTYPE *lvalp, yyscan_t scanner)
 
1357
{
 
1358
        const struct keyword *kw;
 
1359
        const unsigned char *in = (const unsigned char*)scanner->in;
 
1360
        scanner->state = Initial;
 
1361
        while(scanner->pos < scanner->insize) {
 
1362
                unsigned char c = in[scanner->pos++];
 
1363
                enum char_class cClass = id_ctype[c];
 
1364
                switch(cClass) {
 
1365
                        case IdStart:
 
1366
                                textbuffer_putc(&scanner->buf, c);
 
1367
                                break;
 
1368
                        case Operator:
 
1369
                                /* the table contains OP only for \ */
 
1370
                                assert(c == '\\');
 
1371
                                if(scanner->pos < scanner->insize &&
 
1372
                                                in[scanner->pos++] == 'u') {
 
1373
                                        textbuffer_putc(&scanner->buf, c);
 
1374
                                        break;
 
1375
                                }
 
1376
                                if(scanner->pos == scanner->insize) {
 
1377
                                        scanner->pos++;
 
1378
                                }
 
1379
                                /* else fallthrough */
 
1380
                        default:
 
1381
                                /* character is no longer part of identifier */
 
1382
                                scanner->state = Initial;
 
1383
                                textbuffer_putc(&scanner->buf, '\0');
 
1384
                                scanner->pos--;
 
1385
                                kw = in_word_set(scanner->buf.data, scanner->buf.pos-1);
 
1386
                                if(kw) {
 
1387
                                        /* we got a keyword */
 
1388
                                        TOKEN_SET(lvalp, cstring, kw->name);
 
1389
                                        return kw->val;
 
1390
                                }
 
1391
                                /* it is not a keyword, just an identifier */
 
1392
                                TOKEN_SET(lvalp, cstring, NULL);
 
1393
                                return TOK_IDENTIFIER_NAME;
 
1394
                }
 
1395
        }
 
1396
        scanner->state = Identifier;
 
1397
        return 0;
 
1398
}
 
1399
 
 
1400
#ifndef MIN
 
1401
#define MIN(a,b) ((a)<(b) ? (a):(b))
 
1402
#endif
 
1403
 
 
1404
static int parseOperator(YYSTYPE *lvalp, yyscan_t scanner)
 
1405
{
 
1406
        size_t len = MIN(5, scanner->insize - scanner->pos);
 
1407
        while(len) {
 
1408
                const struct operator *kw = in_op_set(&scanner->in[scanner->pos], len);
 
1409
                if(kw) {
 
1410
                        TOKEN_SET(lvalp, cstring, kw->name);
 
1411
                        scanner->pos += len;
 
1412
                        return kw->val;
 
1413
                }
 
1414
                len--;
 
1415
        }
 
1416
        /* never reached */
 
1417
        assert(0);
 
1418
        scanner->pos++;
 
1419
        TOKEN_SET(lvalp, cstring, NULL);
 
1420
        return TOK_ERROR;
 
1421
}
 
1422
 
 
1423
static int yylex_init(yyscan_t *scanner)
 
1424
{
 
1425
        *scanner = cli_calloc(1, sizeof(**scanner));
 
1426
        return *scanner ? 0 : -1;
 
1427
}
 
1428
 
 
1429
static int yylex_destroy(yyscan_t scanner)
 
1430
{
 
1431
        free(scanner->buf.data);
 
1432
        free(scanner);
 
1433
        return 0;
 
1434
}
 
1435
 
 
1436
static int yy_scan_bytes(const char *p, size_t len, yyscan_t scanner)
 
1437
{
 
1438
        scanner->in = p;
 
1439
        scanner->insize = len;
 
1440
        scanner->pos = 0;
 
1441
        scanner->lastpos = -1;
 
1442
        scanner->last_state = Dummy;
 
1443
        return 0;
 
1444
}
 
1445
 
 
1446
static void yyset_debug (int debug_flag ,yyscan_t yyscanner )
 
1447
{
 
1448
}
 
1449
 
 
1450
static void yy_delete_buffer( YY_BUFFER_STATE yyb, yyscan_t scanner)
 
1451
{
 
1452
}
 
1453
 
 
1454
static const char *yyget_text(yyscan_t scanner)
 
1455
{
 
1456
        assert(scanner->buf.data || scanner->yytext);
 
1457
        return scanner->yytext ? scanner->yytext : scanner->buf.data;
 
1458
}
 
1459
 
 
1460
static int yyget_leng(yyscan_t scanner)
 
1461
{
 
1462
        /* we have a \0 too */
 
1463
        return scanner->yylen ? scanner->yylen : (scanner->buf.pos > 0 ? scanner->buf.pos - 1 : 0);
 
1464
}
 
1465
 
 
1466
static int yylex(YYSTYPE *lvalp, yyscan_t  scanner)
 
1467
{
 
1468
        const size_t len = scanner->insize;
 
1469
        const unsigned char *in = (const unsigned char*)scanner->in;
 
1470
        unsigned char lookahead;
 
1471
        enum char_class cClass;
 
1472
 
 
1473
        scanner->yytext = NULL;
 
1474
        scanner->yylen = 0;
 
1475
        if(scanner->pos == scanner->lastpos) {
 
1476
                if(scanner->last_state == scanner->state) {
 
1477
                        cli_dbgmsg(MODULE "infloop detected, skipping character\n");
 
1478
                        scanner->pos++;
 
1479
                }
 
1480
                /* its not necesarely an infloop if it changed
 
1481
                 * state, and it shouldn't infloop between states */
 
1482
        }
 
1483
        scanner->lastpos = scanner->pos;
 
1484
        scanner->last_state = scanner->state;
 
1485
        while(scanner->pos < scanner->insize) {
 
1486
                switch(scanner->state) {
 
1487
                        case Initial:
 
1488
                                textbuf_clean(&scanner->buf);
 
1489
                                cClass = ctype[in[scanner->pos++]];
 
1490
                                switch(cClass) {
 
1491
                                        case Whitespace:
 
1492
                                                /* eat whitespace */
 
1493
                                                continue;
 
1494
                                        case Slash:
 
1495
                                                if(scanner->pos < len) {
 
1496
                                                        lookahead = in[scanner->pos];
 
1497
                                                        switch(lookahead) {
 
1498
                                                                case '*':
 
1499
                                                                        scanner->state = MultilineComment;
 
1500
                                                                        scanner->pos++;
 
1501
                                                                        continue;
 
1502
                                                                case '/':
 
1503
                                                                        scanner->state = SinglelineComment;
 
1504
                                                                        scanner->pos++;
 
1505
                                                                        continue;
 
1506
                                                        }
 
1507
                                                }
 
1508
                                                --scanner->pos;
 
1509
                                                return parseOperator(lvalp, scanner);
 
1510
                                        case Operator:
 
1511
                                                --scanner->pos;
 
1512
                                                return parseOperator(lvalp, scanner);
 
1513
                                        case DQuote:
 
1514
                                                return parseDQString(lvalp, scanner);
 
1515
                                        case SQuote:
 
1516
                                                return parseSQString(lvalp, scanner);
 
1517
                                        case Digit:
 
1518
                                                --scanner->pos;
 
1519
                                                return parseNumber(lvalp, scanner);
 
1520
                                        case IdStart:
 
1521
                                                --scanner->pos;
 
1522
                                                return parseId(lvalp,scanner);
 
1523
                                        CASE_SPECIAL_CHAR(BracketOpen, "[");
 
1524
                                        CASE_SPECIAL_CHAR(BracketClose, "]");
 
1525
                                        CASE_SPECIAL_CHAR(Comma, ",");
 
1526
                                        CASE_SPECIAL_CHAR(CurlyOpen, "{");
 
1527
                                        CASE_SPECIAL_CHAR(CurlyClose, "}");
 
1528
                                        CASE_SPECIAL_CHAR(ParOpen, "(");
 
1529
                                        CASE_SPECIAL_CHAR(ParClose, ")");
 
1530
                                        CASE_SPECIAL_CHAR(Dot, ".");
 
1531
                                        CASE_SPECIAL_CHAR(SemiColon, ";");
 
1532
                                        case Nop:
 
1533
                                               continue;
 
1534
                                }
 
1535
                                break;
 
1536
                        case DoubleQString:
 
1537
                                return parseString(lvalp, scanner, '"', DoubleQString);
 
1538
                        case SingleQString:
 
1539
                                return parseString(lvalp, scanner, '\'', SingleQString);
 
1540
                        case Identifier:
 
1541
                                return parseId(lvalp, scanner);
 
1542
                        case MultilineComment:
 
1543
                                while(scanner->pos+1 < scanner->insize) {
 
1544
                                        if(in[scanner->pos] == '*' && in[scanner->pos+1] == '/') {
 
1545
                                                scanner->state = Initial;
 
1546
                                                scanner->pos++;
 
1547
                                                break;
 
1548
                                        }
 
1549
                                        scanner->pos++;
 
1550
                                }
 
1551
                                scanner->pos++;
 
1552
                                break;
 
1553
                        case Number:
 
1554
                                return parseNumber(lvalp, scanner);
 
1555
                        case SinglelineComment:
 
1556
                                while(scanner->pos < scanner->insize) {
 
1557
                                        /* htmlnorm converts \n to space, so
 
1558
                                         * stop on space too */
 
1559
                                        if(in[scanner->pos] == '\n' || in[scanner->pos] == ' ')
 
1560
                                                break;
 
1561
                                        scanner->pos++;
 
1562
                                }
 
1563
                                scanner->state = Initial;
 
1564
                                break;
 
1565
                }
 
1566
        }
 
1567
        return 0;
 
1568
}