1
/* cfgtok.c - helper class to tokenize an input stream - which surprisingly
2
* currently does not work with streams but with string. But that will
3
* probably change over time ;) This class was originally written to support
4
* the expression module but may evolve when (if) the expression module is
5
* expanded (or aggregated) by a full-fledged ctoken based config parser.
6
* Obviously, this class is used together with config files and not any other
9
* Module begun 2008-02-19 by Rainer Gerhards
11
* Copyright (C) 2008 by Rainer Gerhards and Adiscon GmbH.
13
* This file is part of the rsyslog runtime library.
15
* The rsyslog runtime library is free software: you can redistribute it and/or modify
16
* it under the terms of the GNU Lesser General Public License as published by
17
* the Free Software Foundation, either version 3 of the License, or
18
* (at your option) any later version.
20
* The rsyslog runtime library is distributed in the hope that it will be useful,
21
* but WITHOUT ANY WARRANTY; without even the implied warranty of
22
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
* GNU Lesser General Public License for more details.
25
* You should have received a copy of the GNU Lesser General Public License
26
* along with the rsyslog runtime library. If not, see <http://www.gnu.org/licenses/>.
28
* A copy of the GPL can be found in the file "COPYING" in this distribution.
29
* A copy of the LGPL can be found in the file "COPYING.LESSER" in this distribution.
44
DEFobjCurrIf(ctok_token)
48
/* Standard-Constructor
50
BEGINobjConstruct(ctok) /* be sure to specify the object type also in END macro! */
54
/* ConstructionFinalizer
55
* rgerhards, 2008-01-09
57
rsRetVal ctokConstructFinalize(ctok_t __attribute__((unused)) *pThis)
64
/* destructor for the ctok object */
65
BEGINobjDestruct(ctok) /* be sure to specify the object type also in END and CODESTART macros! */
66
CODESTARTobjDestruct(ctok)
67
/* ... then free resources */
71
/* unget character from input stream. At most one character can be ungotten.
72
* This funtion is only permitted to be called after at least one character
73
* has been read from the stream. Right now, we handle the situation simply by
74
* moving the string "stream" pointer one position backwards. If we work with
75
* real streams (some time), the strm object will handle the functionality
76
* itself. -- rgerhards, 2008-02-19
79
ctokUngetCharFromStream(ctok_t *pThis, uchar __attribute__((unused)) c)
83
ISOBJ_TYPE_assert(pThis, ctok);
90
/* get the next character from the input "stream" (currently just a in-memory
91
* string...) -- rgerhards, 2008-02-19
94
ctokGetCharFromStream(ctok_t *pThis, uchar *pc)
98
ISOBJ_TYPE_assert(pThis, ctok);
101
/* end of string or begin of comment terminates the "stream" */
102
if(*pThis->pp == '\0' || *pThis->pp == '#') {
103
ABORT_FINALIZE(RS_RET_EOS);
114
/* skip whitespace in the input "stream".
115
* rgerhards, 2008-02-19
118
ctokSkipWhitespaceFromStream(ctok_t *pThis)
123
ISOBJ_TYPE_assert(pThis, ctok);
125
CHKiRet(ctokGetCharFromStream(pThis, &c));
127
CHKiRet(ctokGetCharFromStream(pThis, &c));
130
/* we must unget the one non-whitespace we found */
131
CHKiRet(ctokUngetCharFromStream(pThis, c));
133
dbgprintf("skipped whitepsace, stream now '%s'\n", pThis->pp);
139
/* get the next word from the input "stream" (currently just a in-memory
140
* string...). A word is anything from the current location until the
141
* first non-alphanumeric character. If the word is longer
142
* than the provided memory buffer, parsing terminates when buffer length
143
* has been reached. A buffer of 128 bytes or more should always be by
144
* far sufficient. -- rgerhards, 2008-02-19
147
ctokGetWordFromStream(ctok_t *pThis, uchar *pWordBuf, size_t lenWordBuf)
152
ISOBJ_TYPE_assert(pThis, ctok);
153
ASSERT(pWordBuf != NULL);
154
ASSERT(lenWordBuf > 0);
156
CHKiRet(ctokSkipWhitespaceFromStream(pThis));
158
CHKiRet(ctokGetCharFromStream(pThis, &c));
159
while((isalnum(c) || c == '_' || c == '-') && lenWordBuf > 1) {
162
CHKiRet(ctokGetCharFromStream(pThis, &c));
164
*pWordBuf = '\0'; /* there is always space for this - see while() */
166
/* push back the char that we have read too much */
167
CHKiRet(ctokUngetCharFromStream(pThis, c));
174
/* read in a constant number
175
* This is the "number" ABNF element
176
* rgerhards, 2008-02-19
179
ctokGetNumber(ctok_t *pThis, ctok_token_t *pToken)
182
number_t n; /* the parsed number */
187
ISOBJ_TYPE_assert(pThis, ctok);
188
ASSERT(pToken != NULL);
190
pToken->tok = ctok_NUMBER;
192
CHKiRet(ctokGetCharFromStream(pThis, &c));
193
if(c == '0') { /* octal? */
194
CHKiRet(ctokGetCharFromStream(pThis, &c));
195
if(c == 'x') { /* nope, hex! */
196
CHKiRet(ctokGetCharFromStream(pThis, &c));
207
/* this loop is quite simple, a variable name is terminated by whitespace. */
208
while(isdigit(c) || (c >= 'a' && c <= 'f')) {
217
ABORT_FINALIZE(RS_RET_INVALID_OCTAL_DIGIT);
219
ABORT_FINALIZE(RS_RET_INVALID_HEX_DIGIT);
222
/* we now have the next value and know it is right */
223
n = n * iBase + valC;
224
CHKiRet(ctokGetCharFromStream(pThis, &c));
228
/* we need to unget the character that made the loop terminate */
229
CHKiRet(ctokUngetCharFromStream(pThis, c));
231
CHKiRet(var.SetNumber(pToken->pVar, n));
238
/* read in a variable
239
* This covers both msgvar and sysvar from the ABNF.
240
* rgerhards, 2008-02-19
243
ctokGetVar(ctok_t *pThis, ctok_token_t *pToken)
249
ISOBJ_TYPE_assert(pThis, ctok);
250
ASSERT(pToken != NULL);
252
CHKiRet(ctokGetCharFromStream(pThis, &c));
254
if(c == '$') { /* second dollar, we have a system variable */
255
pToken->tok = ctok_SYSVAR;
256
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* "eat" it... */
258
pToken->tok = ctok_MSGVAR;
261
CHKiRet(rsCStrConstruct(&pstrVal));
262
/* this loop is quite simple, a variable name is terminated by whitespace. */
264
CHKiRet(rsCStrAppendChar(pstrVal, tolower(c)));
265
CHKiRet(ctokGetCharFromStream(pThis, &c));
267
CHKiRet(rsCStrFinish(pStrB));
269
CHKiRet(var.SetString(pToken->pVar, pstrVal));
273
if(iRet != RS_RET_OK) {
274
if(pstrVal != NULL) {
275
rsCStrDestruct(&pstrVal);
283
/* read in a simple string (simpstr in ABNF)
284
* rgerhards, 2008-02-19
287
ctokGetSimpStr(ctok_t *pThis, ctok_token_t *pToken)
294
ISOBJ_TYPE_assert(pThis, ctok);
295
ASSERT(pToken != NULL);
297
pToken->tok = ctok_SIMPSTR;
299
CHKiRet(rsCStrConstruct(&pstrVal));
300
CHKiRet(ctokGetCharFromStream(pThis, &c));
301
/* while we are in escape mode (had a backslash), no sequence
302
* terminates the loop. If outside, it is terminated by a single quote.
304
while(bInEsc || c != '\'') {
306
CHKiRet(rsCStrAppendChar(pstrVal, c));
312
CHKiRet(rsCStrAppendChar(pstrVal, c));
315
CHKiRet(ctokGetCharFromStream(pThis, &c));
317
CHKiRet(rsCStrFinish(pStrB));
319
CHKiRet(var.SetString(pToken->pVar, pstrVal));
323
if(iRet != RS_RET_OK) {
324
if(pstrVal != NULL) {
325
rsCStrDestruct(&pstrVal);
333
/* Unget a token. The token ungotten will be returned the next time
334
* ctokGetToken() is called. Only one token can be ungotten at a time.
335
* If a second token is ungotten, the first is lost. This is considered
336
* a programming error.
337
* rgerhards, 2008-02-20
340
ctokUngetToken(ctok_t *pThis, ctok_token_t *pToken)
344
ISOBJ_TYPE_assert(pThis, ctok);
345
ASSERT(pToken != NULL);
346
ASSERT(pThis->pUngotToken == NULL);
348
pThis->pUngotToken = pToken;
354
/* skip an inine comment (just like a C-comment)
355
* rgerhards, 2008-02-20
358
ctokSkipInlineComment(ctok_t *pThis)
362
int bHadAsterisk = 0;
364
ISOBJ_TYPE_assert(pThis, ctok);
366
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
367
while(!(bHadAsterisk && c == '/')) {
368
bHadAsterisk = (c == '*') ? 1 : 0;
369
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read next */
378
/* Get the *next* token from the input stream. This parses the next token and
379
* ignores any whitespace in between. End of stream is communicated via iRet.
380
* The returned token must either be destructed by the caller OR being passed
381
* back to ctokUngetToken().
382
* rgerhards, 2008-02-19
385
ctokGetToken(ctok_t *pThis, ctok_token_t **ppToken)
388
ctok_token_t *pToken;
391
int bRetry = 0; /* retry parse? Only needed for inline comments... */
393
ISOBJ_TYPE_assert(pThis, ctok);
394
ASSERT(ppToken != NULL);
396
/* first check if we have an ungotten token and, if so, provide that
397
* one back (without any parsing). -- rgerhards, 2008-02-20
399
if(pThis->pUngotToken != NULL) {
400
*ppToken = pThis->pUngotToken;
401
pThis->pUngotToken = NULL;
405
/* setup the stage - create our token */
406
CHKiRet(ctok_token.Construct(&pToken));
407
CHKiRet(ctok_token.ConstructFinalize(pToken));
409
/* find the next token. We may loop when we have inline comments */
412
CHKiRet(ctokSkipWhitespaceFromStream(pThis));
413
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
416
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
417
pToken->tok = (c == '=')? ctok_CMP_EQ : ctok_INVALID;
420
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
421
pToken->tok = (c == '=')? ctok_CMP_NEQ : ctok_INVALID;
423
case '<': /* <, <=, <> */
424
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
426
pToken->tok = ctok_CMP_LTEQ;
427
} else if(c == '>') {
428
pToken->tok = ctok_CMP_NEQ;
430
pToken->tok = ctok_CMP_LT;
433
case '>': /* >, >= */
434
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
436
pToken->tok = ctok_CMP_GTEQ;
438
pToken->tok = ctok_CMP_GT;
442
pToken->tok = ctok_PLUS;
445
pToken->tok = ctok_MINUS;
448
pToken->tok = ctok_TIMES;
450
case '/': /* /, /.* ... *./ (comments, mungled here for obvious reasons...) */
451
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
453
/* we have a comment and need to skip it */
454
ctokSkipInlineComment(pThis);
457
CHKiRet(ctokUngetCharFromStream(pThis, c)); /* put back, not processed */
459
pToken->tok = ctok_DIV;
462
pToken->tok = ctok_MOD;
465
pToken->tok = ctok_LPAREN;
468
pToken->tok = ctok_RPAREN;
471
pToken->tok = ctok_COMMA;
474
pToken->tok = ctok_STRADD;
477
CHKiRet(ctokGetVar(pThis, pToken));
479
case '\'': /* simple string, this is somewhat more elaborate */
480
CHKiRet(ctokGetSimpStr(pThis, pToken));
483
/* TODO: template string parser */
484
ABORT_FINALIZE(RS_RET_NOT_IMPLEMENTED);
487
CHKiRet(ctokUngetCharFromStream(pThis, c)); /* push back, we need it in any case */
489
CHKiRet(ctokGetNumber(pThis, pToken));
490
} else { /* now we check if we have a multi-char sequence */
491
CHKiRet(ctokGetWordFromStream(pThis, szWord, sizeof(szWord)/sizeof(uchar)));
492
if(!strcasecmp((char*)szWord, "and")) {
493
pToken->tok = ctok_AND;
494
} else if(!strcasecmp((char*)szWord, "or")) {
495
pToken->tok = ctok_OR;
496
} else if(!strcasecmp((char*)szWord, "not")) {
497
pToken->tok = ctok_NOT;
498
} else if(!strcasecmp((char*)szWord, "contains")) {
499
pToken->tok = ctok_CMP_CONTAINS;
500
} else if(!strcasecmp((char*)szWord, "contains_i")) {
501
pToken->tok = ctok_CMP_CONTAINSI;
502
} else if(!strcasecmp((char*)szWord, "startswith")) {
503
pToken->tok = ctok_CMP_STARTSWITH;
504
} else if(!strcasecmp((char*)szWord, "startswith_i")) {
505
pToken->tok = ctok_CMP_STARTSWITHI;
506
} else if(!strcasecmp((char*)szWord, "then")) {
507
pToken->tok = ctok_THEN;
509
/* finally, we check if it is a function */
510
CHKiRet(ctokGetCharFromStream(pThis, &c)); /* read a charater */
512
/* push c back, higher level parser needs it */
513
CHKiRet(ctokUngetCharFromStream(pThis, c));
514
pToken->tok = ctok_FUNCTION;
515
// TODO: fill function name
516
} else { /* give up... */
517
pToken->tok = ctok_INVALID;
523
} while(bRetry); /* warning: do ... while()! */
526
dbgoprint((obj_t*) pToken, "token: %d\n", pToken->tok);
529
if(iRet != RS_RET_OK) {
531
ctok_token.Destruct(&pToken);
538
/* property set methods */
539
/* simple ones first */
540
DEFpropSetMeth(ctok, pp, uchar*)
542
/* return the current position of pp - most important as currently we do only
543
* partial parsing, so the rest must know where to start from...
544
* rgerhards, 2008-02-19
547
ctokGetpp(ctok_t *pThis, uchar **pp)
556
/* queryInterface function
557
* rgerhards, 2008-02-21
559
BEGINobjQueryInterface(ctok)
560
CODESTARTobjQueryInterface(ctok)
561
if(pIf->ifVersion != ctokCURR_IF_VERSION) { /* check for current version, increment on each change */
562
ABORT_FINALIZE(RS_RET_INTERFACE_NOT_SUPPORTED);
565
/* ok, we have the right interface, so let's fill it
566
* Please note that we may also do some backwards-compatibility
567
* work here (if we can support an older interface version - that,
568
* of course, also affects the "if" above).
570
//xxxpIf->oID = OBJctok;
572
pIf->Construct = ctokConstruct;
573
pIf->ConstructFinalize = ctokConstructFinalize;
574
pIf->Destruct = ctokDestruct;
575
pIf->Getpp = ctokGetpp;
576
pIf->GetToken = ctokGetToken;
577
pIf->UngetToken = ctokUngetToken;
578
pIf->Setpp = ctokSetpp;
580
ENDobjQueryInterface(ctok)
584
BEGINObjClassInit(ctok, 1, OBJ_IS_CORE_MODULE) /* class, version */
585
/* request objects we use */
586
CHKiRet(objUse(ctok_token, CORE_COMPONENT));
587
CHKiRet(objUse(var, CORE_COMPONENT));
589
OBJSetMethodHandler(objMethod_CONSTRUCTION_FINALIZER, ctokConstructFinalize);
590
ENDObjClassInit(ctok)