2
*******************************************************************************
4
* Copyright (C) 1998-2003, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
11
* Modification History:
13
* Date Name Description
14
* 05/26/99 stephen Creation.
15
* 5/10/01 Ram removed ustdio dependency
16
*******************************************************************************
21
#include "unicode/ustring.h"
23
#define OPENBRACE 0x007B
24
#define CLOSEBRACE 0x007D
29
#define ASTERISK 0x002A
36
static int32_t lineCount;
39
static enum ETokenType getStringToken(UCHARBUF *buf,
41
struct UString *token,
44
static UChar32 getNextChar (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
45
static void seekUntilNewline (UCHARBUF *buf, struct UString *token, UErrorCode *status);
46
static void seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
47
static UBool isWhitespace (UChar32 c);
48
static UBool isNewline (UChar32 c);
50
void resetLineNumber() {
54
/* Read and return the next token from the stream. If the token is of
55
type eString, fill in the token parameter with the token. If the
56
token is eError, then the status parameter will contain the
57
specific error. This will be eItemNotFound at the end of file,
58
indicating that all tokens have been returned. This method will
59
never return eString twice in a row; instead, multiple adjacent
60
string tokens will be merged into one, with no intervening
62
enum ETokenType getNextToken(UCHARBUF* buf,
63
struct UString *token,
64
uint32_t *linenumber, /* out: linenumber of token */
65
struct UString *comment,
67
enum ETokenType result;
70
if (U_FAILURE(*status)) {
75
c = getNextChar(buf, TRUE, comment, status);
77
if (U_FAILURE(*status)) {
81
*linenumber = lineCount;
87
return TOK_OPEN_BRACE;
89
return TOK_CLOSE_BRACE;
98
result = getStringToken(buf, c, token, status);
101
*linenumber = lineCount;
105
/* Copy a string token into the given UnicodeString. Upon entry, we
106
have already read the first character of the string token, which is
107
not a whitespace character (but may be a QUOTE or ESCAPE). This
108
function reads all subsequent characters that belong with this
109
string, and copy them into the token parameter. The other
110
important, and slightly convoluted purpose of this function is to
111
merge adjacent strings. It looks forward a bit, and if the next
112
non comment, non whitespace item is a string, it reads it in as
113
well. If two adjacent strings are quoted, they are merged without
114
intervening space. Otherwise a single SPACE character is
116
static enum ETokenType getStringToken(UCHARBUF* buf,
118
struct UString *token,
119
UErrorCode *status) {
120
UBool lastStringWasQuoted;
122
UChar target[3] = { '\0' };
123
UChar *pTarget = target;
125
UBool isFollowingCharEscaped=FALSE;
126
UBool isNLUnescaped = FALSE;
129
/* We are guaranteed on entry that initialChar is not a whitespace
130
character. If we are at the EOF, or have some other problem, it
131
doesn't matter; we still want to validly return the initialChar
132
(if nothing else) as a string token. */
134
if (U_FAILURE(*status)) {
139
lastStringWasQuoted = FALSE;
141
ustr_setlen(token, 0, status);
143
if (U_FAILURE(*status)) {
149
if (!lastStringWasQuoted && token->fLength > 0) {
150
ustr_ucat(token, SPACE, status);
152
if (U_FAILURE(*status)) {
157
lastStringWasQuoted = TRUE;
160
c = ucbuf_getc(buf,status);
167
/* Unterminated quoted strings */
168
if (U_FAILURE(*status)) {
172
if (c == QUOTE && !isFollowingCharEscaped) {
176
if (c == ESCAPE && !isFollowingCharEscaped) {
178
c = unescape(buf, status);
183
if(c == CR || c == LF){
184
isNLUnescaped = TRUE;
188
if(c==ESCAPE && !isFollowingCharEscaped){
189
isFollowingCharEscaped = TRUE;
191
U_APPEND_CHAR32(c, pTarget,len);
193
ustr_uscat(token, pTarget,len, status);
194
isFollowingCharEscaped = FALSE;
196
if(c == CR || c == LF){
197
if(isNLUnescaped == FALSE && prevC!=CR){
200
isNLUnescaped = FALSE;
204
if (U_FAILURE(*status)) {
210
if (token->fLength > 0) {
211
ustr_ucat(token, SPACE, status);
213
if (U_FAILURE(*status)) {
218
if(lastStringWasQuoted){
219
if(getShowWarning()){
220
warning(lineCount, "Mixing quoted and unquoted strings");
228
lastStringWasQuoted = FALSE;
230
/* if we reach here we are mixing
231
* quoted and unquoted strings
232
* warn in normal mode and error in
238
c = unescape(buf, status);
246
U_APPEND_CHAR32(c, pTarget,len);
248
ustr_uscat(token, pTarget,len, status);
251
if (U_FAILURE(*status)) {
256
/* DON'T skip whitespace */
257
c = getNextChar(buf, FALSE, NULL, status);
261
ucbuf_ungetc(c, buf);
265
if (U_FAILURE(*status)) {
274
ucbuf_ungetc(c, buf);
278
if (isWhitespace(c)) {
284
c = unescape(buf, status);
291
U_APPEND_CHAR32(c, pTarget,len);
293
ustr_uscat(token, pTarget,len, status);
295
if (U_FAILURE(*status)) {
301
/* DO skip whitespace */
302
c = getNextChar(buf, TRUE, NULL, status);
304
if (U_FAILURE(*status)) {
308
if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
309
ucbuf_ungetc(c, buf);
315
/* Retrieve the next character. If skipwhite is
316
true, whitespace is skipped as well. */
317
static UChar32 getNextChar(UCHARBUF* buf,
319
struct UString *token,
320
UErrorCode *status) {
323
if (U_FAILURE(*status)) {
328
c = ucbuf_getc(buf,status);
334
if (skipwhite && isWhitespace(c)) {
338
/* This also handles the get() failing case */
343
c = ucbuf_getc(buf,status);
351
seekUntilNewline(buf, NULL, status);
355
c2 = ucbuf_getc(buf, status);
357
/* parse multi-line comment and store it in token*/
358
seekUntilEndOfComment(buf, token, status);
360
ucbuf_ungetc(c, buf);
361
seekUntilEndOfComment(buf, NULL, status);
366
ucbuf_ungetc(c, buf);
367
/* If get() failed this is a NOP */
374
static void seekUntilNewline(UCHARBUF* buf,
375
struct UString *token,
376
UErrorCode *status) {
379
if (U_FAILURE(*status)) {
384
c = ucbuf_getc(buf,status);
385
/* add the char to token */
387
ustr_u32cat(token, c, status);
389
} while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
392
static void seekUntilEndOfComment(UCHARBUF *buf,
393
struct UString *token,
394
UErrorCode *status) {
398
if (U_FAILURE(*status)) {
405
c = ucbuf_getc(buf, status);
408
d = ucbuf_getc(buf, status);
411
ucbuf_ungetc(d, buf);
416
/* add the char to token */
418
ustr_u32cat(token, c, status);
420
/* increment the lineCount */
423
} while (c != U_EOF && *status == U_ZERO_ERROR);
426
*status = U_INVALID_FORMAT_ERROR;
427
error(line, "unterminated comment detected");
431
UChar32 unescape(UCHARBUF *buf,
432
UErrorCode *status) {
433
if (U_FAILURE(*status)) {
437
/* We expect to be called after the ESCAPE has been seen, but
438
* u_fgetcx needs an ESCAPE to do its magic. */
439
ucbuf_ungetc(ESCAPE, buf);
441
return ucbuf_getcx32(buf, status);
444
static UBool isWhitespace(UChar32 c) {
446
/* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
461
static UBool isNewline(UChar32 c) {
463
/* '\n', '\r', 0x2029 */