2
* This is a parser module for RFC3164(legacy syslog)-formatted messages.
4
* NOTE: read comments in module-template.h to understand how this file
7
* File begun on 2009-11-04 by RGerhards
9
* Copyright 2007, 2009 Rainer Gerhards and Adiscon GmbH.
11
* This file is part of rsyslog.
13
* Rsyslog is free software: you can redistribute it and/or modify
14
* it under the terms of the GNU General Public License as published by
15
* the Free Software Foundation, either version 3 of the License, or
16
* (at your option) any later version.
18
* Rsyslog is distributed in the hope that it will be useful,
19
* but WITHOUT ANY WARRANTY; without even the implied warranty of
20
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
* GNU General Public License for more details.
23
* You should have received a copy of the GNU General Public License
24
* along with Rsyslog. If not, see <http://www.gnu.org/licenses/>.
26
* A copy of the GPL can be found in the file "COPYING" in this distribution.
37
#include "syslogd-types.h"
40
#include "module-template.h"
45
#include "unicode-helper.h"
49
PARSER_NAME("contrib.rfc3164sd")
51
/* internal structures
57
DEFobjCurrIf(datetime)
61
static int bParseHOSTNAMEandTAG; /* cache for the equally-named global param - performance enhancement */
64
BEGINisCompatibleWithFeature
65
CODESTARTisCompatibleWithFeature
66
if(eFeat == sFEATUREAutomaticSanitazion)
68
if(eFeat == sFEATUREAutomaticPRIParsing)
70
ENDisCompatibleWithFeature
72
/* Helper to parseRFCSyslogMsg. This function parses the structured
73
* data field of a message. It does NOT parse inside structured data,
74
* just gets the field as whole. Parsing the single entities is left
75
* to other functions. The parsepointer is advanced
76
* to after the terminating SP. The caller must ensure that the
77
* provided buffer is large enough to hold the to be extracted value.
78
* Returns 0 if everything is fine or 1 if either the field is not
79
* SP-terminated or any other error occurs. -- rger, 2005-11-24
80
* The function now receives the size of the string and makes sure
81
* that it does not process more than that. The *pLenStr counter is
82
* updated on exit. -- rgerhards, 2009-09-23
84
static int parseRFCStructuredData(uchar **pp2parse, uchar *pResult, int *pLenStr)
91
assert(pp2parse != NULL);
92
assert(*pp2parse != NULL);
93
assert(pResult != NULL);
98
/* this is the actual parsing loop
99
* Remeber: structured data starts with [ and includes any characters
100
* until the first ] followed by a SP. There may be spaces inside
101
* structured data. There may also be \] inside the structured data, which
102
* do NOT terminate an element.
106
while(lenStr > 0 && *p2parse == ' ') {
107
++p2parse; /* eat SP, but only if not at end of string */
111
if(lenStr == 0 || *p2parse != '[')
112
return 1; /* this is NOT structured data! */
114
if(*p2parse == '-') { /* empty structured data? */
121
/* we now need to check if we have only structured data */
122
if(lenStr > 0 && *p2parse == ']') {
123
*pResult++ = *p2parse;
128
iRet = 1; /* this is not valid! */
131
} else if(*p2parse == '\\' && *(p2parse+1) == ']') {
132
/* this is escaped, need to copy both */
133
*pResult++ = *p2parse++;
134
*pResult++ = *p2parse++;
136
} else if(*p2parse == ']' && *(p2parse+1) == ' ') {
137
/* found end, just need to copy the ] and eat the SP */
138
*pResult++ = *p2parse;
143
*pResult++ = *p2parse++;
149
if(lenStr > 0 && *p2parse == ' ') {
150
++p2parse; /* eat SP, but only if not at end of string */
153
iRet = 1; /* there MUST be an SP! */
157
/* set the new parse pointer */
163
/* parse a legay-formatted syslog message.
168
int bTAGCharDetected;
169
int i; /* general index for parsing */
170
uchar bufParseTAG[CONF_TAG_MAXSIZE];
171
uchar bufParseHOSTNAME[CONF_HOSTNAME_MAXSIZE];
174
dbgprintf("Message will now be parsed by the legacy syslog parser with structured-data support.\n");
175
assert(pMsg != NULL);
176
assert(pMsg->pszRawMsg != NULL);
177
lenMsg = pMsg->iLenRawMsg - pMsg->offAfterPRI; /* note: offAfterPRI is already the number of PRI chars (do not add one!) */
178
p2parse = pMsg->pszRawMsg + pMsg->offAfterPRI; /* point to start of text, after PRI */
179
setProtocolVersion(pMsg, 0);
181
/* Check to see if msg contains a timestamp. We start by assuming
182
* that the message timestamp is the time of reception (which we
183
* generated ourselfs and then try to actually find one inside the
184
* message. There we go from high-to low precison and are done
185
* when we find a matching one. -- rgerhards, 2008-09-16
187
if(datetime.ParseTIMESTAMP3339(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) {
188
/* we are done - parse pointer is moved by ParseTIMESTAMP3339 */;
189
} else if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) {
190
/* we are done - parse pointer is moved by ParseTIMESTAMP3164 */;
191
} else if(*p2parse == ' ' && lenMsg > 1) { /* try to see if it is slighly malformed - HP procurve seems to do that sometimes */
192
++p2parse; /* move over space */
194
if(datetime.ParseTIMESTAMP3164(&(pMsg->tTIMESTAMP), &p2parse, &lenMsg) == RS_RET_OK) {
195
/* indeed, we got it! */
196
/* we are done - parse pointer is moved by ParseTIMESTAMP3164 */;
197
} else {/* parse pointer needs to be restored, as we moved it off-by-one
205
if(pMsg->msgFlags & IGNDATE) {
206
/* we need to ignore the msg data, so simply copy over reception date */
207
memcpy(&pMsg->tTIMESTAMP, &pMsg->tRcvdAt, sizeof(struct syslogTime));
210
/* rgerhards, 2006-03-13: next, we parse the hostname and tag. But we
211
* do this only when the user has not forbidden this. I now introduce some
212
* code that allows a user to configure rsyslogd to treat the rest of the
213
* message as MSG part completely. In this case, the hostname will be the
214
* machine that we received the message from and the tag will be empty. This
215
* is meant to be an interim solution, but for now it is in the code.
217
if(bParseHOSTNAMEandTAG && !(pMsg->msgFlags & INTERNAL_MSG)) {
218
/* parse HOSTNAME - but only if this is network-received!
219
* rger, 2005-11-14: we still have a problem with BSD messages. These messages
220
* do NOT include a host name. In most cases, this leads to the TAG to be treated
221
* as hostname and the first word of the message as the TAG. Clearly, this is not
222
* of advantage ;) I think I have now found a way to handle this situation: there
223
* are certain characters which are frequently used in TAG (e.g. ':'), which are
224
* *invalid* in host names. So while parsing the hostname, I check for these characters.
225
* If I find them, I set a simple flag but continue. After parsing, I check the flag.
226
* If it was set, then we most probably do not have a hostname but a TAG. Thus, I change
227
* the fields. I think this logic shall work with any type of syslog message.
228
* rgerhards, 2009-06-23: and I now have extended this logic to every character
229
* that is not a valid hostname.
231
bTAGCharDetected = 0;
232
if(lenMsg > 0 && pMsg->msgFlags & PARSE_HOSTNAME) {
234
while(i < lenMsg && (isalnum(p2parse[i]) || p2parse[i] == '.' || p2parse[i] == '.'
235
|| p2parse[i] == '_' || p2parse[i] == '-') && i < (CONF_HOSTNAME_MAXSIZE - 1)) {
236
bufParseHOSTNAME[i] = p2parse[i];
241
/* we have a message that is empty immediately after the hostname,
242
* but the hostname thus is valid! -- rgerhards, 2010-02-22
246
bufParseHOSTNAME[i] = '\0';
247
MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i);
248
} else if(i > 0 && p2parse[i] == ' ' && isalnum(p2parse[i-1])) {
249
/* we got a hostname! */
250
p2parse += i + 1; /* "eat" it (including SP delimiter) */
252
bufParseHOSTNAME[i] = '\0';
253
MsgSetHOSTNAME(pMsg, bufParseHOSTNAME, i);
257
/* now parse TAG - that should be present in message from all sources.
258
* This code is somewhat not compliant with RFC 3164. As of 3164,
259
* the TAG field is ended by any non-alphanumeric character. In
260
* practice, however, the TAG often contains dashes and other things,
261
* which would end the TAG. So it is not desirable. As such, we only
262
* accept colon and SP to be terminators. Even there is a slight difference:
263
* a colon is PART of the TAG, while a SP is NOT part of the tag
264
* (it is CONTENT). Starting 2008-04-04, we have removed the 32 character
265
* size limit (from RFC3164) on the tag. This had bad effects on existing
266
* envrionments, as sysklogd didn't obey it either (probably another bug
267
* in RFC3164...). We now receive the full size, but will modify the
268
* outputs so that only 32 characters max are used by default.
271
while(lenMsg > 0 && *p2parse != ':' && *p2parse != ' ' && i < CONF_TAG_MAXSIZE) {
272
bufParseTAG[i++] = *p2parse++;
275
if(lenMsg > 0 && *p2parse == ':') {
278
bufParseTAG[i++] = ':';
281
/* no TAG can only be detected if the message immediatly ends, in which case an empty TAG
282
* is considered OK. So we do not need to check for empty TAG. -- rgerhards, 2009-06-23
284
bufParseTAG[i] = '\0'; /* terminate string */
285
MsgSetTAG(pMsg, bufParseTAG, i);
286
} else {/* we enter this code area when the user has instructed rsyslog NOT
287
* to parse HOSTNAME and TAG - rgerhards, 2006-03-13
289
if(!(pMsg->msgFlags & INTERNAL_MSG)) {
290
DBGPRINTF("HOSTNAME and TAG not parsed by user configuraton.\n");
294
CHKmalloc(pBuf = MALLOC(sizeof(uchar) * (lenMsg + 1)));
296
/* STRUCTURED-DATA */
297
if (parseRFCStructuredData(&p2parse, pBuf, &lenMsg) == 0)
298
MsgSetStructuredData(pMsg, (char*)pBuf);
300
MsgSetStructuredData(pMsg, "-");
302
/* The rest is the actual MSG */
303
MsgSetMSGoffs(pMsg, p2parse - pMsg->pszRawMsg);
313
/* release what we no longer need */
314
objRelease(errmsg, CORE_COMPONENT);
315
objRelease(glbl, CORE_COMPONENT);
316
objRelease(parser, CORE_COMPONENT);
317
objRelease(datetime, CORE_COMPONENT);
323
CODEqueryEtryPt_STD_PMOD_QUERIES
324
CODEqueryEtryPt_IsCompatibleWithFeature_IF_OMOD_QUERIES
330
*ipIFVersProvided = CURR_MOD_IF_VERSION; /* we only support the current interface specification */
331
CODEmodInit_QueryRegCFSLineHdlr
332
CHKiRet(objUse(glbl, CORE_COMPONENT));
333
CHKiRet(objUse(errmsg, CORE_COMPONENT));
334
CHKiRet(objUse(parser, CORE_COMPONENT));
335
CHKiRet(objUse(datetime, CORE_COMPONENT));
337
dbgprintf("rfc3164sd parser init called\n");
338
bParseHOSTNAMEandTAG = glbl.GetParseHOSTNAMEandTAG(); /* cache value, is set only during rsyslogd option processing */