1
/* ========================================================================== **
4
* Copyright (C) 1998 by Christopher R. Hertel
6
* Email: crh@ubiqx.mn.org
8
* -------------------------------------------------------------------------- **
9
* This module is a very simple parser for Samba debug log files.
10
* -------------------------------------------------------------------------- **
12
* This library is free software; you can redistribute it and/or
13
* modify it under the terms of the GNU Lesser General Public
14
* License as published by the Free Software Foundation; either
15
* version 3 of the License, or (at your option) any later version.
17
* This library is distributed in the hope that it will be useful,
18
* but WITHOUT ANY WARRANTY; without even the implied warranty of
19
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
* Library General Public License for more details.
22
* You should have received a copy of the GNU Lesser General Public
23
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
25
* -------------------------------------------------------------------------- **
26
* The important function in this module is dbg_char2token(). The rest is
27
* basically fluff. (Potentially useful fluff, but still fluff.)
28
* ========================================================================== **
31
#include "debugparse.h"
33
/* -------------------------------------------------------------------------- **
36
* DBG_BSIZE - This internal constant is used only by dbg_test(). It is the
37
* size of the read buffer. I've tested the function using a
38
* DBG_BSIZE value of 2.
43
/* -------------------------------------------------------------------------- **
47
const char *dbg_token2string( dbg_Token tok )
48
/* ------------------------------------------------------------------------ **
49
* Given a token, return a string describing the token.
51
* Input: tok - One of the set of dbg_Tokens defined in debugparse.h.
53
* Output: A string identifying the token. This is useful for debugging,
56
* Note: If the token is not known, this function will return the
59
* ------------------------------------------------------------------------ **
71
return( "time stamp" );
75
return( "source file" );
79
return( "line number" );
85
return( "<unknown>" );
86
} /* dbg_token2string */
88
dbg_Token dbg_char2token( dbg_Token *state, int c )
89
/* ------------------------------------------------------------------------ **
90
* Parse input one character at a time.
92
* Input: state - A pointer to a token variable. This is used to
93
* maintain the parser state between calls. For
94
* each input stream, you should set up a separate
95
* state variable and initialize it to dbg_null.
96
* Pass a pointer to it into this function with each
97
* character in the input stream. See dbg_test()
99
* c - The "current" character in the input stream.
102
* The token value will change when delimiters are found,
103
* which indicate a transition between syntactical objects.
104
* Possible return values are:
106
* dbg_null - The input character was an end-of-line.
107
* This resets the parser to its initial state
108
* in preparation for parsing the next line.
109
* dbg_eof - Same as dbg_null, except that the character
110
* was an end-of-file.
111
* dbg_ignore - Returned for whitespace and delimiters.
112
* These lexical tokens are only of interest
114
* dbg_header - Indicates the start of a header line. The
115
* input character was '[' and was the first on
117
* dbg_timestamp - Indicates that the input character was part
118
* of a header timestamp.
119
* dbg_level - Indicates that the input character was part
120
* of the debug-level value in the header.
121
* dbg_sourcefile - Indicates that the input character was part
122
* of the sourcefile name in the header.
123
* dbg_function - Indicates that the input character was part
124
* of the function name in the header.
125
* dbg_lineno - Indicates that the input character was part
126
* of the DEBUG call line number in the header.
127
* dbg_message - Indicates that the input character was part
128
* of the DEBUG message text.
130
* ------------------------------------------------------------------------ **
133
/* The terminating characters that we see will greatly depend upon
134
* how they are read. For example, if gets() is used instead of
135
* fgets(), then we will not see newline characters. A lot also
136
* depends on the calling function, which may handle terminators
139
* '\n', '\0', and EOF are all considered line terminators. The
140
* dbg_eof token is sent back if an EOF is encountered.
142
* Warning: only allow the '\0' character to be sent if you are
143
* using gets() to read whole lines (thus replacing '\n'
144
* with '\0'). Sending '\0' at the wrong time will mess
150
*state = dbg_null; /* Set state to null (initial state) so */
151
return( dbg_eof ); /* that we can restart with new input. */
154
*state = dbg_null; /* A newline or eoln resets to the null state. */
158
/* When within the body of the message, only a line terminator
159
* can cause a change of state. We've already checked for line
160
* terminators, so if the current state is dbg_msgtxt, simply
161
* return that as our current token.
163
if( dbg_message == *state )
164
return( dbg_message );
166
/* If we are at the start of a new line, and the input character
167
* is an opening bracket, then the line is a header line, otherwise
168
* it's a message body line.
170
if( dbg_null == *state )
174
*state = dbg_timestamp;
175
return( dbg_header );
177
*state = dbg_message;
178
return( dbg_message );
181
/* We've taken care of terminators, text blocks and new lines.
182
* The remaining possibilities are all within the header line
186
/* Within the header line, whitespace can be ignored *except*
187
* within the timestamp.
191
/* Fudge. The timestamp may contain space characters. */
192
if( (' ' == c) && (dbg_timestamp == *state) )
193
return( dbg_timestamp );
194
/* Otherwise, ignore whitespace. */
195
return( dbg_ignore );
198
/* Okay, at this point we know we're somewhere in the header.
199
* Valid header *states* are: dbg_timestamp, dbg_level,
200
* dbg_sourcefile, dbg_function, and dbg_lineno.
205
if( dbg_timestamp == *state )
208
return( dbg_ignore );
212
if( dbg_level == *state )
214
*state = dbg_sourcefile;
215
return( dbg_ignore );
219
if( dbg_sourcefile == *state )
221
*state = dbg_function;
222
return( dbg_ignore );
226
if( dbg_function == *state )
229
return( dbg_ignore );
233
if( dbg_lineno == *state )
236
return( dbg_ignore );
241
/* If the previous block did not result in a state change, then
242
* return the current state as the current token.
245
} /* dbg_char2token */
247
void dbg_test( void );
248
void dbg_test( void )
249
/* ------------------------------------------------------------------------ **
250
* Simple test function.
254
* Notes: This function was used to test dbg_char2token(). It reads a
255
* Samba log file from stdin and prints parsing info to stdout.
256
* It also serves as a simple example.
258
* ------------------------------------------------------------------------ **
261
char bufr[DBG_BSIZE];
264
dbg_Token old = dbg_null,
268
while( fgets( bufr, DBG_BSIZE, stdin ) )
270
for( i = 0; bufr[i]; i++ )
273
newtok = dbg_char2token( &state, bufr[i] );
278
(void)putchar( '\n' );
287
(void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(newtok) );
288
(void)putchar( bufr[i] );
292
(void)putchar( '\n' );
296
/* -------------------------------------------------------------------------- **
297
* This simple main line can be uncommented and used to test the parser.
308
/* ========================================================================== */