2
*******************************************************************************
4
* Copyright (C) 1998-2000, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
11
* Modification History:
13
* Date Name Description
14
* 05/26/99 stephen Creation.
15
* 5/10/01 Ram removed ustdio dependency
16
*******************************************************************************
21
#include "unicode/ustring.h"
23
#define OPENBRACE 0x007B
24
#define CLOSEBRACE 0x007D
29
#define ASTERISK 0x002A
34
static int32_t lineCount;
37
static enum ETokenType getStringToken(UCHARBUF *buf,
39
struct UString *token,
42
static UChar32 getNextChar (UCHARBUF *buf, UBool skipwhite, UErrorCode *status);
43
static void seekUntilNewline (UCHARBUF *buf, UErrorCode *status);
44
static void seekUntilEndOfComment (UCHARBUF *buf, UErrorCode *status);
45
static UBool isWhitespace (UChar32 c);
46
static UBool isNewline (UChar32 c);
48
void resetLineNumber() {
52
/* Read and return the next token from the stream. If the token is of
53
type eString, fill in the token parameter with the token. If the
54
token is eError, then the status parameter will contain the
55
specific error. This will be eItemNotFound at the end of file,
56
indicating that all tokens have been returned. This method will
57
never return eString twice in a row; instead, multiple adjacent
58
string tokens will be merged into one, with no intervening
60
enum ETokenType getNextToken(UCHARBUF* buf,
61
struct UString *token,
62
uint32_t *linenumber, /* out: linenumber of token */
64
enum ETokenType result;
67
if (U_FAILURE(*status)) {
72
c = getNextChar(buf, TRUE, status);
74
if (U_FAILURE(*status)) {
78
*linenumber = lineCount;
84
return TOK_OPEN_BRACE;
86
return TOK_CLOSE_BRACE;
95
result = getStringToken(buf, c, token, status);
98
*linenumber = lineCount;
102
/* Copy a string token into the given UnicodeString. Upon entry, we
103
have already read the first character of the string token, which is
104
not a whitespace character (but may be a QUOTE or ESCAPE). This
105
function reads all subsequent characters that belong with this
106
string, and copy them into the token parameter. The other
107
important, and slightly convoluted purpose of this function is to
108
merge adjacent strings. It looks forward a bit, and if the next
109
non comment, non whitespace item is a string, it reads it in as
110
well. If two adjacent strings are quoted, they are merged without
111
intervening space. Otherwise a single SPACE character is
113
static enum ETokenType getStringToken(UCHARBUF* buf,
115
struct UString *token,
116
UErrorCode *status) {
117
UBool lastStringWasQuoted;
119
UChar target[3] = { '\0' };
120
UChar *pTarget = target;
122
UBool isFollowingCharEscaped=FALSE;
123
/* We are guaranteed on entry that initialChar is not a whitespace
124
character. If we are at the EOF, or have some other problem, it
125
doesn't matter; we still want to validly return the initialChar
126
(if nothing else) as a string token. */
128
if (U_FAILURE(*status)) {
133
lastStringWasQuoted = FALSE;
135
ustr_setlen(token, 0, status);
137
if (U_FAILURE(*status)) {
143
if (!lastStringWasQuoted && token->fLength > 0) {
144
ustr_ucat(token, SPACE, status);
146
if (U_FAILURE(*status)) {
151
lastStringWasQuoted = TRUE;
154
c = ucbuf_getc(buf,status);
161
/* Unterminated quoted strings */
162
if (U_FAILURE(*status)) {
166
if (c == QUOTE && !isFollowingCharEscaped) {
172
c = unescape(buf, status);
179
if(c==ESCAPE && !isFollowingCharEscaped){
180
isFollowingCharEscaped = TRUE;
182
U_APPEND_CHAR32(c, pTarget,len);
184
ustr_uscat(token, pTarget,len, status);
185
isFollowingCharEscaped = FALSE;
189
if (U_FAILURE(*status)) {
194
if (token->fLength > 0) {
195
ustr_ucat(token, SPACE, status);
197
if (U_FAILURE(*status)) {
202
lastStringWasQuoted = FALSE;
206
c = unescape(buf, status);
214
U_APPEND_CHAR32(c, pTarget,len);
216
ustr_uscat(token, pTarget,len, status);
219
if (U_FAILURE(*status)) {
224
/* DON'T skip whitespace */
225
c = getNextChar(buf, FALSE, status);
229
ucbuf_ungetc(c, buf);
233
if (U_FAILURE(*status)) {
242
ucbuf_ungetc(c, buf);
246
if (isWhitespace(c)) {
252
c = unescape(buf, status);
259
U_APPEND_CHAR32(c, pTarget,len);
261
ustr_uscat(token, pTarget,len, status);
263
if (U_FAILURE(*status)) {
269
/* DO skip whitespace */
270
c = getNextChar(buf, TRUE, status);
272
if (U_FAILURE(*status)) {
276
if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
277
ucbuf_ungetc(c, buf);
283
/* Retrieve the next character, ignoring comments. If skipwhite is
284
true, whitespace is skipped as well. */
285
static UChar32 getNextChar(UCHARBUF* buf,
287
UErrorCode *status) {
290
if (U_FAILURE(*status)) {
295
c = ucbuf_getc(buf,status);
301
if (skipwhite && isWhitespace(c)) {
305
/* This also handles the get() failing case */
310
c = ucbuf_getc(buf,status);
318
seekUntilNewline(buf, status);
322
seekUntilEndOfComment(buf, status);
326
ucbuf_ungetc(c, buf);
327
/* If get() failed this is a NOP */
333
static void seekUntilNewline(UCHARBUF* buf,
334
UErrorCode *status) {
337
if (U_FAILURE(*status)) {
342
c = ucbuf_getc(buf,status);
343
} while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
346
static void seekUntilEndOfComment(UCHARBUF *buf,
347
UErrorCode *status) {
351
if (U_FAILURE(*status)) {
358
c = ucbuf_getc(buf, status);
361
d = ucbuf_getc(buf, status);
364
ucbuf_ungetc(d, buf);
369
} while (c != U_EOF && *status == U_ZERO_ERROR);
372
*status = U_INVALID_FORMAT_ERROR;
373
error(line, "unterminated comment detected");
377
UChar32 unescape(UCHARBUF *buf,
378
UErrorCode *status) {
379
if (U_FAILURE(*status)) {
383
/* We expect to be called after the ESCAPE has been seen, but
384
* u_fgetcx needs an ESCAPE to do its magic. */
385
ucbuf_ungetc(ESCAPE, buf);
387
return ucbuf_getcx(buf, status);
390
static UBool isWhitespace(UChar32 c) {
392
/* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
407
static UBool isNewline(UChar32 c) {
409
/* '\n', '\r', 0x2029 */