2
* Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
3
* Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
5
* Redistribution and modifications are permitted subject to BSD license.
7
#include <asn_system.h>
8
#include <xer_support.h>
17
ST_TAG_UNQUOTED_STRING,
18
ST_COMMENT_WAIT_DASH1, /* "<!--"[1] */
19
ST_COMMENT_WAIT_DASH2, /* "<!--"[2] */
21
ST_COMMENT_CLO_DASH2, /* "-->"[0] */
22
ST_COMMENT_CLO_RT /* "-->"[1] */
25
static pxml_chunk_type_e final_chunk_type[] = {
36
0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
37
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
38
1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
39
2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0, /* 01234567 89 */
40
0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* ABCDEFG HIJKLMNO */
41
3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0, /* PQRSTUVW XYZ */
42
0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* abcdefg hijklmno */
43
3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0 /* pqrstuvw xyz */
45
#define WHITESPACE(c) (_charclass[(unsigned char)(c)] == 1)
46
#define ALNUM(c) (_charclass[(unsigned char)(c)] >= 2)
47
#define ALPHA(c) (_charclass[(unsigned char)(c)] == 3)
49
/* Aliases for characters, ASCII/UTF-8 */
50
#define EXCLAM 0x21 /* '!' */
51
#define CQUOTE 0x22 /* '"' */
52
#define CDASH 0x2d /* '-' */
53
#define CSLASH 0x2f /* '/' */
54
#define LANGLE 0x3c /* '<' */
55
#define CEQUAL 0x3d /* '=' */
56
#define RANGLE 0x3e /* '>' */
57
#define CQUEST 0x3f /* '?' */
59
/* Invoke token callback */
60
#define TOKEN_CB_CALL(type, _ns, _current_too, _final) do { \
63
ssize_t _sz = (p - chunk_start) + _current_too; \
69
_ret = cb(type, chunk_start, _sz, key); \
71
if(_current_too && _ret == -1) \
75
chunk_start = p + _current_too; \
79
#define TOKEN_CB(_type, _ns, _current_too) \
80
TOKEN_CB_CALL(_type, _ns, _current_too, 0)
82
#define TOKEN_CB_FINAL(_type, _ns, _current_too) \
83
TOKEN_CB_CALL(final_chunk_type[_type], _ns, _current_too, 1)
88
ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
89
pstate_e state = (pstate_e)*stateContext;
90
const char *chunk_start = (const char *)xmlbuf;
91
const char *p = chunk_start;
92
const char *end = p + size;
95
int C = *(const unsigned char *)p;
99
* Initial state: we're in the middle of some text,
100
* or just have started.
103
/* We're now in the tag, probably */
104
TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
107
if (ALPHA(C) || (C == CSLASH))
109
else if (C == EXCLAM)
110
state = ST_COMMENT_WAIT_DASH1;
113
* Not characters and not whitespace.
114
* Must be something like "3 < 4".
116
TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
122
TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
126
* The previous tag wasn't completed, but still
127
* recognized as valid. (Mozilla-compatible)
129
TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);
132
state = ST_TAG_QUOTE_WAIT;
136
case ST_TAG_QUOTE_WAIT:
138
* State after the equal sign ("=") in the tag.
142
state = ST_TAG_QUOTED_STRING;
146
TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
150
/* Unquoted string value */
151
state = ST_TAG_UNQUOTED_STRING;
154
case ST_TAG_QUOTED_STRING:
156
* Tag attribute's string value in quotes.
159
/* Return back to the tag state */
163
case ST_TAG_UNQUOTED_STRING:
166
TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
167
} else if(WHITESPACE(C)) {
168
/* Return back to the tag state */
172
case ST_COMMENT_WAIT_DASH1:
174
state = ST_COMMENT_WAIT_DASH2;
176
/* Some ordinary tag. */
180
case ST_COMMENT_WAIT_DASH2:
185
/* Some ordinary tag */
191
state = ST_COMMENT_CLO_DASH2;
194
case ST_COMMENT_CLO_DASH2:
196
state = ST_COMMENT_CLO_RT;
198
/* This is not an end of a comment */
202
case ST_COMMENT_CLO_RT:
204
TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
205
} else if(C == CDASH) {
206
/* Maintain current state, still waiting for '>' */
215
* Flush the partially processed chunk, state permitting.
217
if(p - chunk_start) {
220
TOKEN_CB(PXML_COMMENT, state, 0);
223
TOKEN_CB(PXML_TEXT, state, 0);
225
default: break; /* a no-op */
230
*stateContext = (int)state;
231
return chunk_start - (const char *)xmlbuf;