1
/* $Id: xml.c 2727 2009-06-01 09:28:28Z bennylp $ */
3
* Copyright (C) 2008-2009 Teluu Inc. (http://www.teluu.com)
4
* Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
* Additional permission under GNU GPL version 3 section 7:
22
* If you modify this program, or any covered work, by linking or
23
* combining it with the OpenSSL project's OpenSSL library (or a
24
* modified version of that library), containing parts covered by the
25
* terms of the OpenSSL or SSLeay licenses, Teluu Inc. (http://www.teluu.com)
26
* grants you additional permission to convey the resulting work.
27
* Corresponding Source for a non-source form of such a combination
28
* shall include the source code for the parts of OpenSSL used as well
29
* as that of the covered work.
31
#include <pjlib-util/xml.h>
32
#include <pjlib-util/scanner.h>
33
#include <pj/except.h>
35
#include <pj/string.h>
39
#define EX_SYNTAX_ERROR 12
40
#define THIS_FILE "xml.c"
42
static void on_syntax_error(struct pj_scanner *scanner)
44
PJ_UNUSED_ARG(scanner);
45
PJ_THROW(EX_SYNTAX_ERROR);
48
static pj_xml_node *alloc_node( pj_pool_t *pool )
52
node = PJ_POOL_ZALLOC_T(pool, pj_xml_node);
53
pj_list_init( &node->attr_head );
54
pj_list_init( &node->node_head );
59
static pj_xml_attr *alloc_attr( pj_pool_t *pool )
61
return PJ_POOL_ZALLOC_T(pool, pj_xml_attr);
64
/* This is a recursive function! */
65
static pj_xml_node *xml_parse_node( pj_pool_t *pool, pj_scanner *scanner)
72
if (*scanner->curptr != '<')
73
on_syntax_error(scanner);
75
/* Handle Processing Instructino (PI) construct (i.e. "<?") */
76
if (*scanner->curptr == '<' && *(scanner->curptr+1) == '?') {
77
pj_scan_advance_n(scanner, 2, PJ_FALSE);
80
pj_scan_get_until_ch(scanner, '?', &dummy);
81
if (*scanner->curptr=='?' && *(scanner->curptr+1)=='>') {
82
pj_scan_advance_n(scanner, 2, PJ_TRUE);
85
pj_scan_advance_n(scanner, 1, PJ_FALSE);
88
return xml_parse_node(pool, scanner);
91
/* Handle comments construct (i.e. "<!") */
92
if (pj_scan_strcmp(scanner, "<!", 2) == 0) {
93
pj_scan_advance_n(scanner, 2, PJ_FALSE);
96
pj_scan_get_until_ch(scanner, '>', &dummy);
97
if (pj_scan_strcmp(scanner, ">", 1) == 0) {
98
pj_scan_advance_n(scanner, 1, PJ_TRUE);
101
pj_scan_advance_n(scanner, 1, PJ_FALSE);
104
return xml_parse_node(pool, scanner);
108
node = alloc_node(pool);
111
pj_scan_get_char(scanner);
114
pj_scan_get_until_chr( scanner, " />\t", &node->name);
116
/* Get attributes. */
117
while (*scanner->curptr != '>' && *scanner->curptr != '/') {
118
pj_xml_attr *attr = alloc_attr(pool);
120
pj_scan_get_until_chr( scanner, "=> \t", &attr->name);
121
if (*scanner->curptr == '=') {
122
pj_scan_get_char( scanner );
123
pj_scan_get_quotes(scanner, "\"'", "\"'", 2, &attr->value);
124
/* remove quote characters */
126
attr->value.slen -= 2;
129
pj_list_push_back( &node->attr_head, attr );
132
if (*scanner->curptr == '/') {
133
pj_scan_get_char(scanner);
134
if (pj_scan_get_char(scanner) != '>')
135
on_syntax_error(scanner);
139
/* Enclosing bracket. */
140
if (pj_scan_get_char(scanner) != '>')
141
on_syntax_error(scanner);
144
while (*scanner->curptr == '<' && *(scanner->curptr+1) != '/') {
145
pj_xml_node *sub_node = xml_parse_node(pool, scanner);
146
pj_list_push_back( &node->node_head, sub_node );
150
if (!pj_scan_is_eof(scanner) && *scanner->curptr != '<') {
151
pj_scan_get_until_ch(scanner, '<', &node->content);
154
/* Enclosing node. */
155
if (pj_scan_get_char(scanner) != '<' || pj_scan_get_char(scanner) != '/')
156
on_syntax_error(scanner);
158
pj_scan_get_until_chr(scanner, " \t>", &end_name);
161
if (pj_stricmp(&node->name, &end_name) != 0)
162
on_syntax_error(scanner);
165
if (pj_scan_get_char(scanner) != '>')
166
on_syntax_error(scanner);
171
PJ_DEF(pj_xml_node*) pj_xml_parse( pj_pool_t *pool, char *msg, pj_size_t len)
173
pj_xml_node *node = NULL;
177
if (!msg || !len || !pool)
180
pj_scan_init( &scanner, msg, len,
181
PJ_SCAN_AUTOSKIP_WS|PJ_SCAN_AUTOSKIP_NEWLINE,
184
node = xml_parse_node(pool, &scanner);
187
PJ_LOG(4,(THIS_FILE, "Syntax error parsing XML in line %d column %d",
188
scanner.line, pj_scan_get_col(&scanner)));
191
pj_scan_fini( &scanner );
195
/* This is a recursive function. */
196
static int xml_print_node( const pj_xml_node *node, int indent,
197
char *buf, pj_size_t len )
202
pj_xml_node *sub_node;
204
#define SIZE_LEFT() ((int)(len - (p-buf)))
209
if (SIZE_LEFT() < node->name.slen + indent + 5)
211
for (i=0; i<indent; ++i)
214
pj_memcpy(p, node->name.ptr, node->name.slen);
215
p += node->name.slen;
217
/* Print attributes. */
218
attr = node->attr_head.next;
219
while (attr != &node->attr_head) {
221
if (SIZE_LEFT() < attr->name.slen + attr->value.slen + 4)
226
/* Attribute name. */
227
pj_memcpy(p, attr->name.ptr, attr->name.slen);
228
p += attr->name.slen;
230
/* Attribute value. */
231
if (attr->value.slen) {
234
pj_memcpy(p, attr->value.ptr, attr->value.slen);
235
p += attr->value.slen;
242
/* Check for empty node. */
243
if (node->content.slen==0 &&
244
node->node_head.next==(pj_xml_node*)&node->node_head)
253
if (SIZE_LEFT() < 1) return -1;
256
/* Print sub nodes. */
257
sub_node = node->node_head.next;
258
while (sub_node != (pj_xml_node*)&node->node_head) {
261
if (SIZE_LEFT() < indent + 3)
266
printed = xml_print_node(sub_node, indent + 1, p, SIZE_LEFT());
271
sub_node = sub_node->next;
275
if (node->content.slen) {
276
if (SIZE_LEFT() < node->content.slen) return -1;
277
pj_memcpy(p, node->content.ptr, node->content.slen);
278
p += node->content.slen;
281
/* Enclosing node. */
282
if (node->node_head.next != (pj_xml_node*)&node->node_head) {
283
if (SIZE_LEFT() < node->name.slen + 5 + indent)
287
for (i=0; i<indent; ++i)
290
if (SIZE_LEFT() < node->name.slen + 3)
295
pj_memcpy(p, node->name.ptr, node->name.slen);
296
p += node->name.slen;
304
PJ_DEF(int) pj_xml_print(const pj_xml_node *node, char *buf, pj_size_t len,
305
pj_bool_t include_prolog)
310
if (!node || !buf || !len)
313
if (include_prolog) {
314
pj_str_t prolog = {"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39};
315
if ((int)len < prolog.slen)
317
pj_memcpy(buf, prolog.ptr, prolog.slen);
318
prolog_len = prolog.slen;
321
printed = xml_print_node(node, 0, buf+prolog_len, len-prolog_len) + prolog_len;
322
if (printed > 0 && len-printed >= 1) {
323
buf[printed++] = '\n';
328
PJ_DEF(pj_xml_node*) pj_xml_node_new(pj_pool_t *pool, const pj_str_t *name)
330
pj_xml_node *node = alloc_node(pool);
331
pj_strdup(pool, &node->name, name);
335
PJ_DEF(pj_xml_attr*) pj_xml_attr_new( pj_pool_t *pool, const pj_str_t *name,
336
const pj_str_t *value)
338
pj_xml_attr *attr = alloc_attr(pool);
339
pj_strdup( pool, &attr->name, name);
340
pj_strdup( pool, &attr->value, value);
344
PJ_DEF(void) pj_xml_add_node( pj_xml_node *parent, pj_xml_node *node )
346
pj_list_push_back(&parent->node_head, node);
349
PJ_DEF(void) pj_xml_add_attr( pj_xml_node *node, pj_xml_attr *attr )
351
pj_list_push_back(&node->attr_head, attr);
354
PJ_DEF(pj_xml_node*) pj_xml_find_node(const pj_xml_node *parent,
355
const pj_str_t *name)
357
const pj_xml_node *node = parent->node_head.next;
361
while (node != (void*)&parent->node_head) {
362
if (pj_stricmp(&node->name, name) == 0)
363
return (pj_xml_node*)node;
369
PJ_DEF(pj_xml_node*) pj_xml_find_node_rec(const pj_xml_node *parent,
370
const pj_str_t *name)
372
const pj_xml_node *node = parent->node_head.next;
376
while (node != (void*)&parent->node_head) {
378
if (pj_stricmp(&node->name, name) == 0)
379
return (pj_xml_node*)node;
380
found = pj_xml_find_node_rec(node, name);
382
return (pj_xml_node*)found;
388
PJ_DEF(pj_xml_node*) pj_xml_find_next_node( const pj_xml_node *parent,
389
const pj_xml_node *node,
390
const pj_str_t *name)
395
while (node != (void*)&parent->node_head) {
396
if (pj_stricmp(&node->name, name) == 0)
397
return (pj_xml_node*)node;
404
PJ_DEF(pj_xml_attr*) pj_xml_find_attr( const pj_xml_node *node,
405
const pj_str_t *name,
406
const pj_str_t *value)
408
const pj_xml_attr *attr = node->attr_head.next;
409
while (attr != (void*)&node->attr_head) {
410
if (pj_stricmp(&attr->name, name)==0) {
412
if (pj_stricmp(&attr->value, value)==0)
413
return (pj_xml_attr*)attr;
415
return (pj_xml_attr*)attr;
425
PJ_DEF(pj_xml_node*) pj_xml_find( const pj_xml_node *parent,
426
const pj_str_t *name,
428
pj_bool_t (*match)(const pj_xml_node *,
431
const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
436
while (node != (const pj_xml_node*) &parent->node_head) {
438
if (pj_stricmp(&node->name, name)!=0) {
444
if (match(node, data))
445
return (pj_xml_node*)node;
447
return (pj_xml_node*)node;
455
PJ_DEF(pj_xml_node*) pj_xml_find_rec( const pj_xml_node *parent,
456
const pj_str_t *name,
458
pj_bool_t (*match)(const pj_xml_node*,
461
const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
466
while (node != (const pj_xml_node*) &parent->node_head) {
470
if (pj_stricmp(&node->name, name)==0) {
472
if (match(node, data))
473
return (pj_xml_node*)node;
475
return (pj_xml_node*)node;
480
if (match(node, data))
481
return (pj_xml_node*)node;
484
found = pj_xml_find_rec(node, name, data, match);
493
PJ_DEF(pj_xml_node*) pj_xml_clone( pj_pool_t *pool, const pj_xml_node *rhs)
496
const pj_xml_attr *r_attr;
497
const pj_xml_node *child;
499
node = alloc_node(pool);
501
pj_strdup(pool, &node->name, &rhs->name);
502
pj_strdup(pool, &node->content, &rhs->content);
504
/* Clone all attributes */
505
r_attr = rhs->attr_head.next;
506
while (r_attr != &rhs->attr_head) {
510
attr = alloc_attr(pool);
511
pj_strdup(pool, &attr->name, &r_attr->name);
512
pj_strdup(pool, &attr->value, &r_attr->value);
514
pj_list_push_back(&node->attr_head, attr);
516
r_attr = r_attr->next;
519
/* Clone all child nodes. */
520
child = rhs->node_head.next;
521
while (child != (pj_xml_node*) &rhs->node_head) {
522
pj_xml_node *new_child;
524
new_child = pj_xml_clone(pool, child);
525
pj_list_push_back(&node->node_head, new_child);