5
void memset(char *s, int c, int n) {
7
while(s < se) *s++ = c;
13
int dh_memcmp(char *a,char *b,int n) {
16
if( *a != *b ) return c+1;
22
struct nodec *new_nodecp( struct nodec *newparent ) {
24
int size = sizeof( struct nodec );
25
struct nodec *self = (struct nodec *) malloc( size );
26
memset( (char *) self, 0, size );
27
self->parent = newparent;
32
struct nodec *new_nodec() {
33
int size = sizeof( struct nodec );
34
struct nodec *self = (struct nodec *) malloc( size );
35
memset( (char *) self, 0, size );
39
void del_nodec( struct nodec *node ) {
40
struct nodec *curnode;
44
curnode = node->firstchild;
51
curatt = node->firstatt;
60
struct attc* new_attc( struct nodec *newparent ) {
61
int size = sizeof( struct attc );
62
struct attc *self = (struct attc *) malloc( size );
63
memset( (char *) self, 0, size );
64
self->parent = newparent;
72
#define ST_comment_1dash 3
73
#define ST_comment_2dash 4
75
#define ST_comment_x 6
81
#define ST_name_gap 11
82
#define ST_att_name1 12
83
#define ST_att_space 13
84
#define ST_att_name 14
85
#define ST_att_nameqs 15
86
#define ST_att_nameqsdone 16
89
#define ST_att_quot 19
90
#define ST_att_quots 20
91
#define ST_att_tick 21
95
int parserc_parse( struct parserc *self, char *xmlin ) {
96
// Variables that represent current 'state'
97
struct nodec *root = NULL;
98
char *tagname = NULL; int tagname_len = 0;
99
char *attname = NULL; int attname_len = 0;
100
char *attval = NULL; int attval_len = 0;
102
struct nodec *curnode = NULL;
103
struct attc *curatt = NULL;
105
self->rootpos = xmlin;
107
// Variables used temporarily during processing
109
char *cpos = &xmlin[0];
114
if( self->last_state ) {
116
printf( "Resuming parse in state %i\n", self->last_state );
119
root = self->rootnode;
120
curnode = self->curnode;
121
curatt = self->curatt;
122
tagname = self->tagname; tagname_len = self->tagname_len;
123
attname = self->attname; attname_len = self->attname_len;
124
attval = self->attval; attval_len = self->attval_len;
125
att_has_val = self->att_has_val;
126
switch( self->last_state ) {
127
case ST_val_1: goto val_1;
128
case ST_val_x: goto val_x;
129
case ST_comment_1dash: goto comment_1dash;
130
case ST_comment_2dash: goto comment_2dash;
131
case ST_comment: goto comment;
132
case ST_comment_x: goto comment_x;
134
case ST_bang: goto bang;
135
case ST_cdata: goto cdata;
136
case ST_name_1: goto name_1;
137
case ST_name_x: goto name_x;
138
case ST_name_gap: goto name_gap;
139
case ST_att_name1: goto att_name1;
140
case ST_att_space: goto att_space;
141
case ST_att_name: goto att_name;
142
case ST_att_nameqs: goto att_nameqs;
143
case ST_att_nameqsdone: goto att_nameqsdone;
144
case ST_att_eq1: goto att_eq1;
145
case ST_att_eqx: goto att_eqx;
146
case ST_att_quot: goto att_quot;
147
case ST_att_quots: goto att_quots;
148
case ST_att_tick: goto att_tick;
149
case ST_ename_1: goto ename_1;
150
case ST_ename_x: goto ename_x;
155
curnode = root = self->rootnode = new_nodec();
159
printf("Entry to C Parser\n");
164
printf("val_1: %c\n", *cpos);
168
case 0: last_state = ST_val_1; goto done;
169
case '<': goto val_x;
171
if( !curnode->numvals ) {
172
curnode->value = cpos;
180
printf("val_x: %c\n", *cpos);
184
case 0: last_state = ST_val_x; goto done;
186
switch( *(cpos+1) ) {
188
if( *(cpos+2) == '[' ) { // <![
189
//if( !strncmp( cpos+3, "CDATA", 5 ) ) {
190
if( *(cpos+3) == 'C' &&
201
goto val_x;//actually goto error...
204
else if( *(cpos+2) == '-' && // <!--
217
tagname_len = 0; // for safety
221
if( curnode->numvals == 1 ) curnode->vallen++;
228
if( let == '-' ) goto comment_2dash;
229
if( !let ) { last_state = ST_comment_1dash; goto done; }
239
if( !let ) { last_state = ST_comment_2dash; goto done; }
245
case 0: last_state = ST_comment; goto done;
246
case '-': goto comment_1dash;
248
if( !curnode->numcoms ) {
249
curnode->comment = cpos;
258
case 0: last_state = ST_comment_x; goto done;
259
case '-': goto comment_1dash;
261
if( curnode->numcoms == 1 ) curnode->comlen++;
267
if( let == '?' && *(cpos+1) == '>' ) {
271
if( !let ) { last_state = ST_pi; goto done; }
281
if( !let ) { last_state = ST_bang; goto done; }
287
if( !let ) { last_state = ST_cdata; goto done; }
288
if( let == ']' && *(cpos+1) == ']' && *(cpos+2) == '>' ) {
292
if( !curnode->numvals ) {
293
curnode->value = cpos;
295
curnode->numvals = 1;
297
if( curnode->numvals == 1 ) curnode->vallen++;
303
printf("name_1: %c\n", *cpos);
307
case 0: last_state = ST_name_1; goto done;
313
case '/': // regular closing tag
314
tagname_len = 0; // needed to reset
325
printf("name_x: %c\n", *cpos);
329
case 0: last_state = ST_name_x; goto done;
333
curnode = nodec_addchildr( curnode, tagname, tagname_len );
338
curnode = nodec_addchildr( curnode, tagname, tagname_len );
341
case '/': // self closing
342
temp = nodec_addchildr( curnode, tagname, tagname_len );
343
temp->z = cpos +1 - xmlin;
356
case 0: last_state = ST_name_gap; goto done;
365
case '/': // self closing
366
curnode->z = cpos+1-xmlin;
367
curnode = curnode->parent;
368
if( !curnode ) goto done;
369
cpos+=2; // am assuming next char is >
373
goto name_gap;//actually goto error
378
printf("attname1: %c\n", *cpos);
383
case 0: last_state = ST_att_name1; goto done;
398
case 0: last_state = ST_att_space; goto done;
409
// we have another attribute name, so continue
413
printf("attname: %c\n", *cpos);
417
case 0: last_state = ST_att_name; goto done;
418
case '/': // self closing !! /> is assumed !!
419
curatt = nodec_addattr( curnode, attname, attname_len );
420
if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
423
curnode->z = cpos+1-xmlin;
424
curnode = curnode->parent;
425
if( !curnode ) goto done;
429
if( *(cpos+1) == '=' ) {
433
curatt = nodec_addattr( curnode, attname, attname_len );
438
curatt = nodec_addattr( curnode, attname, attname_len );
439
if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
445
curatt = nodec_addattr( curnode, attname, attname_len );
451
if( !attname_len ) attname = cpos;
458
printf("nameqs: %c\n", *cpos);
462
case 0: last_state = ST_att_nameqs; goto done;
473
printf("nameqsdone: %c\n", *cpos);
477
case 0: last_state = ST_att_nameqsdone; goto done;
480
curatt = nodec_addattr( curnode, attname, attname_len );
490
case 0: last_state = ST_att_eq1; goto done;
491
case '/': // self closing
492
if( *(cpos+1) == '>' ) {
493
curnode->z = cpos+1-xmlin;
494
curnode = curnode->parent;
495
if( !curnode ) goto done;
500
case '"': cpos++; goto att_quot;
501
case 0x27: cpos++; goto att_quots; //'
502
case '`': cpos++; goto att_tick;
503
case '>': cpos++; goto val_1;
504
case ' ': cpos++; goto att_eq1;
506
if( !attval_len ) attval = cpos;
514
case 0: last_state = ST_att_eqx; goto done;
515
case '/': // self closing
516
if( *(cpos+1) == '>' ) {
517
curnode->z = cpos+1-xmlin;
518
curnode = curnode->parent;
519
if( !curnode ) goto done; // bad error condition
520
curatt->value = attval;
521
curatt->vallen = attval_len;
528
curatt->value = attval;
529
curatt->vallen = attval_len;
534
curatt->value = attval;
535
curatt->vallen = attval_len;
541
if( !attval_len ) attval = cpos;
551
curatt->value = attval;
552
curatt->vallen = attval_len;
558
if( !let ) { last_state = ST_att_quot; goto done; }
559
if( !attval_len ) attval = cpos;
567
if( let == 0x27 ) { // '
569
curatt->value = attval;
570
curatt->vallen = attval_len;
576
if( !let ) { last_state = ST_att_quots; goto done; }
578
if( !attval_len ) attval = cpos;
588
curatt->value = attval;
589
curatt->vallen = attval_len;
595
if( !let ) { last_state = ST_att_tick; goto done; }
597
if( !attval_len ) attval = cpos;
605
curnode->namelen = tagname_len;
606
curnode->z = cpos-xmlin;
607
curnode = curnode->parent; // jump up
608
if( !curnode ) goto done;
614
if( !let ) { last_state = ST_ename_1; goto done; }
620
ename_x: // ending name
623
if( curnode->namelen != tagname_len ) {
626
if( res = dh_memcmp( curnode->name, tagname, tagname_len ) ) {
628
printf("Closing node not equal: curnode->name=%.*s - opening tag=%.*s\n", tagname_len, curnode->name, tagname_len, tagname );
634
curnode->z = cpos-xmlin;
635
curnode = curnode->parent; // jump up
636
if( !curnode ) goto done;
642
if( !let ) { last_state = ST_ename_x; goto done; }
647
self->err = - ( int ) ( cpos - &xmlin[0] );
651
printf("done\n", *cpos);
654
// store the current state of the parser
655
self->last_state = last_state;
656
self->curnode = curnode;
657
self->curatt = curatt;
658
self->tagname = tagname; self->tagname_len = tagname_len;
659
self->attname = attname; self->attname_len = attname_len;
660
self->attval = attval; self->attval_len = attval_len;
661
self->att_has_val = att_has_val;
664
printf("returning\n", *cpos);
669
int parserc_parse_unsafely( struct parserc *self, char *xmlin ) {
670
// Variables that represent current 'state'
671
struct nodec *root = NULL;
672
char *tagname = NULL; int tagname_len = 0;
673
char *attname = NULL; int attname_len = 0;
674
char *attval = NULL; int attval_len = 0;
676
struct nodec *curnode = NULL;
677
struct attc *curatt = NULL;
679
self->rootpos = xmlin;
681
// Variables used temporarily during processing
683
char *cpos = &xmlin[0];
688
if( self->last_state ) {
689
return -1; // unsafe doesn't support this
693
curnode = root = self->rootnode = new_nodec();
697
printf("Entry to C Parser\n");
702
printf("val_1: %c\n", *cpos);
705
case 0: last_state = ST_val_1; goto u_done;
706
case '<': goto u_val_x;
708
if( !curnode->numvals ) {
709
curnode->value = cpos;
717
printf("val_x: %c\n", *cpos);
720
case 0: last_state = ST_val_x; goto u_done;
722
if( *(cpos+1) == '!' &&
734
tagname_len = 0; // for safety
738
if( curnode->numvals == 1 ) curnode->vallen++;
743
if( *cpos == ']' && *(cpos+1) == ']' && *(cpos+2) == '>' ) {
747
if( !curnode->numvals ) {
748
curnode->value = cpos;
750
curnode->numvals = 1;
752
if( curnode->numvals == 1 ) curnode->vallen++;
756
u_name_1: // node name
758
printf("name_1: %c\n", *cpos);
761
case '/': // regular closing tag
762
tagname_len = 0; // needed to reset
771
u_name_x: // node name
773
printf("name_x: %c\n", *cpos);
777
curnode = nodec_addchildr( curnode, tagname, tagname_len );
782
curnode = nodec_addchildr( curnode, tagname, tagname_len );
785
case '/': // self closing
786
temp = nodec_addchildr( curnode, tagname, tagname_len );
796
u_name_gap: // node name gap
802
case '/': // self closing
803
curnode = curnode->parent;
804
if( !curnode ) goto u_done;
805
cpos += 2; // am assuming next char is >
811
printf("attname1: %c\n", *cpos);
825
// we have another attribute name, so continue
829
printf("attname: %c\n", *cpos);
833
case '/': // self closing !! /> is assumed !!
834
curatt = nodec_addattr( curnode, attname, attname_len );
835
if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
838
curnode = curnode->parent;
839
if( !curnode ) goto u_done;
843
if( *(cpos+1) == '=' ) {
847
curatt = nodec_addattr( curnode, attname, attname_len );
852
curatt = nodec_addattr( curnode, attname, attname_len );
853
if( !att_has_val ) { curatt->value = -1; curatt->vallen = 0; }
859
curatt = nodec_addattr( curnode, attname, attname_len );
865
if( !attname_len ) attname = cpos;
872
case '/': // self closing
873
if( *(cpos+1) == '>' ) {
874
curnode = curnode->parent;
875
if( !curnode ) goto u_done;
880
case '"': cpos++; goto u_att_quot;
881
case 0x27: cpos++; goto u_att_quots; //'
882
case '>': cpos++; goto u_val_1;
883
case ' ': cpos++; goto u_att_eq1;
885
if( !attval_len ) attval = cpos;
892
case '/': // self closing
893
if( *(cpos+1) == '>' ) {
894
curnode = curnode->parent;
895
if( !curnode ) goto u_done; // bad error condition
896
curatt->value = attval;
897
curatt->vallen = attval_len;
904
curatt->value = attval;
905
curatt->vallen = attval_len;
910
curatt->value = attval;
911
curatt->vallen = attval_len;
917
if( !attval_len ) attval = cpos;
925
curatt->value = attval;
926
curatt->vallen = attval_len;
932
if( !attval_len ) attval = cpos;
938
if( *cpos == 0x27 ) { // '
940
curatt->value = attval;
941
curatt->vallen = attval_len;
947
if( !attval_len ) attval = cpos;
958
u_ename_x: // ending name
961
curnode->z = cpos-xmlin;
962
curnode = curnode->parent; // jump up
963
if( !curnode ) goto u_done;
975
printf("done\n", *cpos);
978
// store the current state of the parser
979
self->last_state = last_state;
980
self->curnode = curnode;
981
self->curatt = curatt;
982
self->tagname = tagname; self->tagname_len = tagname_len;
983
self->attname = attname; self->attname_len = attname_len;
984
self->attval = attval; self->attval_len = attval_len;
985
self->att_has_val = att_has_val;
988
printf("returning\n", *cpos);
998
struct nodec *nodec_addchildr( struct nodec *self, char *newname, int newnamelen ) {
999
struct nodec *newnode = new_nodecp( self );
1000
newnode->name = newname;
1001
newnode->namelen = newnamelen;
1002
if( self->numchildren == 0 ) {
1003
self->firstchild = newnode;
1004
self->lastchild = newnode;
1005
self->numchildren++;
1009
self->lastchild->next = newnode;
1010
self->lastchild = newnode;
1011
self->numchildren++;
1016
struct attc *nodec_addattr( struct nodec *self, char *newname, int newnamelen ) {
1017
struct attc *newatt = new_attc( self );
1018
newatt->name = newname;
1019
newatt->namelen = newnamelen;
1021
if( !self->numatt ) {
1022
self->firstatt = newatt;
1023
self->lastatt = newatt;
1028
self->lastatt->next = newatt;
1029
self->lastatt = newatt;