20
struct nodec *curnode;
23
/* -------------------------------------------------------------------- */
27
static SV *xml_dequote_string(unsigned char *src, STRLEN src_len)
32
unsigned char c, c1, c2, c3, c4;
33
STRLEN src_len2, dst_len;
39
// calculate dequoted string length
40
while (src_len >= 3) {
48
/* We have "&", now look for:- & " ' < > */
52
if (c2 == ';' && c1 == 't' && (c == 'l' || c == 'g')) {
65
if (c == 'a' && c1 == 'm' && c2 == 'p' && c3 == ';') {
79
&& ((c == 'q' && c1 == 'u' && c2 == 'o' && c3 == 't') || (c == 'a' && c1 == 'p' && c2 == 'o' && c3 == 's'))) {
87
if (dst_len == src_len2) {
89
dstSV = newSVpv(src2, dst_len);
93
/* We have someting to dequote, so make a SV to put it into */
94
dstSV = newSV(dst_len);
95
SvCUR_set(dstSV, dst_len);
99
while (src_len2 >= 3) { // 3 is min length of quoted symbol
110
// 1. test len=3: < >
111
if (c1 == 't' && c2 == ';') {
117
} else if (c == 'g') {
129
// 2. test len=4: &
137
if (c == 'a' && c1 == 'm' && c2 == 'p' && c3 == ';') {
143
// 3. test len=5: " '
152
if (c == 'q' && c1 == 'u' && c2 == 'o' && c3 == 't') {
154
} else if (c == 'a' && c1 == 'p' && c2 == 'o' && c3 == 's') {
169
while (src_len2-- > 0) { // also copy trailing \0
176
/* -------------------------------------------------------------------- */
178
SV *node_val_unescaped(struct nodec * thisnode)
182
if (curnode->type == NODE_TYPE_ESCAPED)
183
sv = xml_dequote_string(curnode->value, curnode->vallen);
185
sv = newSVpvn(curnode->value, curnode->vallen);
192
/* -------------------------------------------------------------------- */
196
HV *output = newHV();
197
SV *outputref = newRV_noinc((SV *) output);
200
int numatts = curnode->numatt;
23
SV *cxml2obj( struct parserc *parser, struct nodec *curnode ) {
24
HV *output = newHV(); // the root
25
SV *outputref = newRV_noinc( (SV *) output ); // return a reference to the root
26
int i; // loop index; defined at the top because this is C
27
struct attc *curatt; // current attribute being worked with
28
int numatts = curnode->numatt; // total number of attributes on the current node
204
33
int length = curnode->numchildren;
205
SV *svi = newSViv(curnode->pos);
207
hv_store(output, "_pos", 4, svi, phash);
208
hv_store(output, "_i", 2, newSViv(curnode->name - rootpos), ihash);
209
hv_store(output, "_z", 2, newSViv(curnode->z), zhash);
211
if (curnode->vallen) {
212
SV *sv = node_val_unescaped(curnode);
213
hv_store(output, "value", 5, sv, vhash);
214
if (curnode->type & NODE_TYPE_CDATA) {
215
SV *svi = newSViv(1);
216
hv_store(output, "_cdata", 6, svi, cdhash);
219
if (curnode->comlen) {
220
SV *sv = newSVpvn(curnode->comment, curnode->comlen);
222
hv_store(output, "comment", 7, sv, chash);
225
if (curnode->vallen) {
226
SV *sv = node_val_unescaped(curnode);
227
hv_store(output, "value", 5, sv, vhash);
228
if (curnode->type & NODE_TYPE_CDATA) {
229
SV *svi = newSViv(1);
230
hv_store(output, "_cdata", 6, svi, cdhash);
233
if (curnode->comlen) {
234
SV *sv = newSVpvn(curnode->comment, curnode->comlen);
236
hv_store(output, "comment", 7, sv, chash);
34
SV *svi = newSViv( curnode->pos );
36
hv_store( output, "_pos", 4, svi, phash );
37
hv_store( output, "_i", 2, newSViv( curnode->name - rootpos ), ihash );
38
hv_store( output, "_z", 2, newSViv( curnode->z ), zhash );
41
printf("Node: %.*s\n", curnode->namelen, curnode->name );
44
// node without children
46
if( curnode->vallen ) {
47
SV * sv = newSVpvn( curnode->value, curnode->vallen );
49
hv_store( output, "value", 5, sv, vhash );
51
SV *svi = newSViv( 1 );
52
hv_store( output, "_cdata", 6, svi, cdhash );
55
if( curnode->comlen ) {
56
SV * sv = newSVpvn( curnode->comment, curnode->comlen );
58
hv_store( output, "comment", 7, sv, chash );
64
if( curnode->vallen ) {
65
SV *sv = newSVpvn( curnode->value, curnode->vallen );
67
hv_store( output, "value", 5, sv, vhash );
69
SV *svi = newSViv( 1 );
70
hv_store( output, "_cdata", 6, svi, cdhash );
73
if( curnode->comlen ) {
74
SV *sv = newSVpvn( curnode->comment, curnode->comlen );
76
hv_store( output, "comment", 7, sv, chash );
79
// loop through child nodes
239
80
curnode = curnode->firstchild;
240
for (i = 0; i < length; i++) {
241
SV *key = newSVpv(curnode->name, curnode->namelen);
243
HE *curh = hv_fetch_ent(output, key, 0, 0);
245
if (curnode->namelen > 6) {
246
if (!strncmp(curnode->name, "multi_", 6)) {
247
SV *subkey = newSVpv(&curnode->name[6], curnode->namelen - 6);
249
HE *oldh = hv_fetch_ent(output, subkey, 0, 0);
81
for( i = 0; i < length; i++ ) {
82
SV **cur = hv_fetch( output, curnode->name, curnode->namelen, 0 );
84
// check for multi_[name] nodes
85
if( curnode->namelen > 6 ) {
86
if( !strncmp( curnode->name, "multi_", 6 ) ) {
87
char *subname = &curnode->name[6];
88
int subnamelen = curnode->namelen-6;
89
SV **old = hv_fetch( output, subname, subnamelen, 0 );
250
90
AV *newarray = newAV();
251
SV *newarrayref = newRV_noinc((SV *) newarray);
253
hv_store_ent(output, subkey, newarrayref, 0);
255
SV *old = HeVAL(oldh);
256
if (SvTYPE(SvRV(old)) == SVt_PVHV) { // check for hash ref
257
SV *newref = newRV((SV *) SvRV(old));
258
hv_delete_ent(output, subkey, 0, 0);
259
hv_store_ent(output, subkey, newarrayref, 0);
260
av_push(newarray, newref);
91
SV *newarrayref = newRV_noinc( (SV *) newarray );
93
hv_store( output, subname, subnamelen, newarrayref, 0 );
96
if( SvTYPE( SvRV(*old) ) == SVt_PVHV ) { // check for hash ref
97
SV *newref = newRV( (SV *) SvRV(*old) );
98
hv_delete( output, subname, subnamelen, 0 );
99
hv_store( output, subname, subnamelen, newarrayref, 0 );
100
av_push( newarray, newref );
263
SvREFCNT_dec(subkey); // no longer need the subkey
268
hv_store_ent(output, key, cxml2obj(), 0);
270
SV *cur = HeVAL(curh);
271
if (SvTYPE(SvRV(cur)) == SVt_PVHV) {
107
SV *ob = cxml2obj( parser, curnode );
108
hv_store( output, curnode->name, curnode->namelen, ob, 0 );
110
else { // there is already a node stored with this name
111
cur_type = SvTYPE( SvRV( *cur ) );
112
if( cur_type == SVt_PVHV ) { // sub value is a hash; must be anode
272
113
AV *newarray = newAV();
273
SV *newarrayref = newRV_noinc((SV *) newarray);
274
SV *newref = newRV((SV *) SvRV(cur));
275
hv_delete_ent(output, key, 0, 0);
276
hv_store_ent(output, key, newarrayref, 0);
277
av_push(newarray, newref);
278
av_push(newarray, cxml2obj());
280
AV *av = (AV *) SvRV(cur);
281
av_push(av, cxml2obj());
114
SV *newarrayref = newRV_noinc( (SV *) newarray );
115
SV *newref = newRV( (SV *) SvRV( *cur ) );
117
hv_delete( output, curnode->name, curnode->namelen, 0 );
118
hv_store( output, curnode->name, curnode->namelen, newarrayref, 0 );
119
av_push( newarray, newref );
120
ob = cxml2obj( parser, curnode );
121
av_push( newarray, ob );
123
else if( cur_type == SVt_PVAV ) {
124
AV *av = (AV *) SvRV( *cur );
125
SV *ob = cxml2obj( parser, curnode );
129
// something else; probably an existing value node; just wipe it out
130
SV *ob = cxml2obj( parser, curnode );
131
hv_store( output, curnode->name, curnode->namelen, ob, 0 );
284
if (i != (length - 1))
285
curnode = curnode->next;
286
SvREFCNT_dec(key); // no longer need the key
134
if( i != ( length - 1 ) ) curnode = curnode->next;
289
137
curnode = curnode->parent;
293
141
curatt = curnode->firstatt;
294
for (i = 0; i < numatts; i++) {
142
for( i = 0; i < numatts; i++ ) {
295
143
HV *atth = newHV();
296
SV *atthref = newRV_noinc((SV *) atth);
297
hv_store(output, curatt->name, curatt->namelen, atthref, 0);
299
attval = newSVpvn(curatt->value, curatt->vallen);
144
SV *atthref = newRV_noinc( (SV *) atth );
145
hv_store( output, curatt->name, curatt->namelen, atthref, 0 );
147
if( curatt->value == NULL ) attval = newSVpvn( "1", 1 );
148
else attval = newSVpvn( curatt->value, curatt->vallen );
300
149
SvUTF8_on(attval);
301
hv_store(atth, "value", 5, attval, vhash);
303
hv_store(atth, "_att", 4, attatt, ahash);
304
if (i != (numatts - 1))
305
curatt = curatt->next;
150
hv_store( atth, "value", 5, attval, vhash );
151
attatt = newSViv( 1 );
152
hv_store( atth, "_att", 4, attatt, ahash );
153
if( i != ( numatts - 1 ) ) curatt = curatt->next;
308
156
return outputref;
311
/* -------------------------------------------------------------------- */
313
SV *cxml2obj_simple()
159
SV *cxml2obj_simple( struct parserc *parser, struct nodec *curnode ) {
316
161
struct attc *curatt;
317
162
int numatts = curnode->numatt;
325
168
int length = curnode->numchildren;
326
if ((length + numatts) == 0) {
328
return node_val_unescaped(curnode);
330
return newSVpv("", 0); // an empty tag has empty string content
169
if( ( length + numatts ) == 0 ) {
170
if( curnode->vallen ) {
171
SV * sv = newSVpvn( curnode->value, curnode->vallen );
175
return newSVpvn( "", 0 );
333
178
output = newHV();
334
outputref = newRV_noinc((SV *) output);
179
outputref = newRV_noinc( (SV *) output );
337
182
curnode = curnode->firstchild;
338
for (i = 0; i < length; i++) {
339
SV *key = newSVpv(curnode->name, curnode->namelen);
341
HE *curh = hv_fetch_ent(output, key, 0, 0);
343
if (curnode->namelen > 6) {
344
if (!strncmp(curnode->name, "multi_", 6)) {
345
SV *subkey = newSVpv(&curnode->name[6], curnode->namelen - 6);
347
HE *oldh = hv_fetch_ent(output, subkey, 0, 0);
183
for( i = 0; i < length; i++ ) {
184
SV *namesv = newSVpvn( curnode->name, curnode->namelen );
187
SV **cur = hv_fetch( output, curnode->name, curnode->namelen, 0 );
189
if( curnode->namelen > 6 ) {
190
if( !strncmp( curnode->name, "multi_", 6 ) ) {
191
char *subname = &curnode->name[6];
192
int subnamelen = curnode->namelen-6;
193
SV **old = hv_fetch( output, subname, subnamelen, 0 );
348
194
AV *newarray = newAV();
349
SV *newarrayref = newRV_noinc((SV *) newarray);
351
hv_store_ent(output, subkey, newarrayref, 0);
353
SV *old = HeVAL(oldh);
354
if (SvTYPE(SvRV(old)) == SVt_PVHV) { // check for hash ref
355
SV *newref = newRV((SV *) SvRV(old));
356
hv_delete_ent(output, subkey, 0, 0);
357
hv_store_ent(output, subkey, newarrayref, 0);
358
av_push(newarray, newref);
195
SV *newarrayref = newRV_noinc( (SV *) newarray );
197
hv_store( output, subname, subnamelen, newarrayref, 0 );
200
if( SvTYPE( SvRV(*old) ) == SVt_PVHV ) { // check for hash ref
201
SV *newref = newRV_noinc( (SV *) SvRV(*old) );
202
hv_delete( output, subname, subnamelen, 0 );
203
hv_store( output, subname, subnamelen, newarrayref, 0 );
204
av_push( newarray, newref );
361
SvREFCNT_dec(subkey); // no longer need the subkey
366
hv_store_ent(output, key, cxml2obj_simple(), 0);
368
SV *cur = HeVAL(curh);
370
if (SvTYPE(SvRV(cur)) == SVt_PVHV) {
211
SV *ob = cxml2obj_simple( parser, curnode );
212
hv_store( output, curnode->name, curnode->namelen, ob, 0 );
215
if( SvROK( *cur ) ) {
216
if( SvTYPE( SvRV(*cur) ) == SVt_PVHV ) {
371
217
AV *newarray = newAV();
372
SV *newarrayref = newRV_noinc((SV *) newarray);
373
SV *newref = newRV((SV *) SvRV(cur));
374
hv_delete_ent(output, key, 0, 0);
375
hv_store_ent(output, key, newarrayref, 0);
376
av_push(newarray, newref);
377
av_push(newarray, cxml2obj_simple());
379
AV *av = (AV *) SvRV(cur);
380
av_push(av, cxml2obj_simple());
218
SV *newarrayref = newRV_noinc( (SV *) newarray );
219
SV *newref = newRV( (SV *) SvRV( *cur ) );
220
hv_delete( output, curnode->name, curnode->namelen, 0 );
221
hv_store( output, curnode->name, curnode->namelen, newarrayref, 0 );
222
av_push( newarray, newref );
223
av_push( newarray, cxml2obj_simple( parser, curnode ) );
226
AV *av = (AV *) SvRV( *cur );
227
av_push( av, cxml2obj_simple( parser, curnode ) );
383
231
AV *newarray = newAV();
384
SV *newarrayref = newRV_noinc((SV *) newarray);
232
SV *newarrayref = newRV( (SV *) newarray );
387
char *ptr = SvPV(cur, len);
388
SV *newsv = newSVpvn(ptr, len);
235
char *ptr = SvPV(*cur, len);
236
SV *newsv = newSVpvn( ptr, len );
389
237
SvUTF8_on(newsv);
391
av_push(newarray, newsv);
392
hv_delete_ent(output, key, 0, 0);
393
hv_store_ent(output, key, newarrayref, 0);
394
av_push(newarray, cxml2obj_simple());
239
av_push( newarray, newsv );
240
hv_delete( output, curnode->name, curnode->namelen, 0 );
241
hv_store( output, curnode->name, curnode->namelen, newarrayref, 0 );
242
av_push( newarray, cxml2obj_simple( parser, curnode ) );
397
if (i != (length - 1))
398
curnode = curnode->next;
399
SvREFCNT_dec(key); // no longer need the key
245
if( i != ( length - 1 ) ) curnode = curnode->next;
401
247
curnode = curnode->parent;
403
SV *sv = node_val_unescaped(curnode);
404
hv_store(output, "content", 7, sv, vhash);
250
if( curnode->type ) { // store cdata value under content, even if empty or spaces
251
SV * sv = newSVpvn( curnode->value, curnode->vallen );
253
hv_store( output, "content", 7, sv, content_hash );
257
for( i=0;i<curnode->vallen;i++ ) {
258
char let = curnode->value[ i ];
259
if( let != ' ' && let != 0x0d && let != 0x0a ) {
265
SV * sv = newSVpvn( curnode->value, curnode->vallen );
267
hv_store( output, "content", 7, sv, content_hash );
408
273
curatt = curnode->firstatt;
409
for (i = 0; i < numatts; i++) {
410
attval = newSVpvn(curatt->value, curatt->vallen);
274
for( i = 0; i < numatts; i++ ) {
275
if( curatt->value == NULL ) attval = newSVpvn( "1", 1 );
276
else attval = newSVpvn( curatt->value, curatt->vallen );
411
277
SvUTF8_on(attval);
412
hv_store(output, curatt->name, curatt->namelen, attval, 0);
413
if (i != (numatts - 1))
414
curatt = curatt->next;
278
hv_store( output, curatt->name, curatt->namelen, attval, 0 );
279
if( i != ( numatts - 1 ) ) curatt = curatt->next;
418
283
return outputref;
421
/* -------------------------------------------------------------------- */
424
// Indent and XS declarations do not mix well :-(
287
PERL_HASH(vhash, "value", 5);
288
PERL_HASH(ahash, "_att", 4);
289
PERL_HASH(chash, "comment", 7);
290
PERL_HASH(phash, "_pos", 4);
291
PERL_HASH(ihash, "_i", 2 );
292
PERL_HASH(zhash, "_z", 2 );
293
PERL_HASH(cdhash, "_cdata", 6 );
426
296
MODULE = XML::Bare PACKAGE = XML::Bare
431
if( root->err ) RETVAL = newSViv( root->err );
302
struct parserc *parser;
303
parser = INT2PTR( struct parserc *, SvUV( parsersv ) );
304
if( parser->err ) RETVAL = newSViv( parser->err );
306
RETVAL = cxml2obj( parser, parser->rootnode );
307
//printf("refcnt: %i\n", SvREFCNT( RETVAL ) );
442
PERL_HASH(vhash, "content", 7);
444
RETVAL = cxml2obj_simple();
313
xml2obj_simple( parsersv )
316
PERL_HASH( content_hash, "content", 7 );
317
struct parserc *parser;
318
parser = INT2PTR( struct parserc *, SvUV( parsersv ) );
319
if( parser->err ) RETVAL = newSViv( parser->err );
321
RETVAL = cxml2obj_simple( parser, parser->rootnode );
322
//printf("refcnt: %i\n", SvREFCNT( RETVAL ) );
328
c_parse_more( text, parsersv )
332
struct parserc *parser = INT2PTR( struct parserc *, SvUV( parsersv ) );
333
int err = parserc_parse( parser, text );
334
RETVAL = newSVuv( PTR2UV( parser ) );
453
PERL_HASH(vhash, "value", 5);
454
PERL_HASH(ahash, "_att", 4);
455
PERL_HASH(chash, "comment", 7);
456
PERL_HASH(phash, "_pos", 4);
457
PERL_HASH(ihash, "_i", 2 );
458
PERL_HASH(zhash, "_z", 2 );
459
PERL_HASH(cdhash, "_cdata", 6 );
460
root = parserc_parse( text );
344
struct parserc *parser = (struct parserc *) malloc( sizeof( struct parserc ) );
345
parser->last_state = 0;
346
int err = parserc_parse( parser, text );
347
RETVAL = newSVuv( PTR2UV( parser ) );
352
c_parse_unsafely(text)
357
struct parserc *parser = (struct parserc *) malloc( sizeof( struct parserc ) );
358
parser->last_state = 0;
359
int err = parserc_parse_unsafely( parser, text );
360
RETVAL = newSVuv( PTR2UV( parser ) );
463
365
c_parsefile(filename)