1
Description: Move C files to ext/
2
This is required to avoid the double-installation of files in lib/ by
3
extconf.rb (see #630641)
7
+++ ruby-xmlparser-0.7.2/ext/encoding.h
9
+/*****************************************************************
12
+** Copyright 1998 Clark Cooper
13
+** All rights reserved.
15
+** This program is free software; you can redistribute it and/or
16
+** modify it under the same terms as Perl itself.
22
+#define ENCMAP_MAGIC 0xfeebface
24
+typedef struct prefixmap {
26
+ unsigned char len; /* 0 => 256 */
27
+ unsigned short bmap_start;
28
+ unsigned char ispfx[32];
29
+ unsigned char ischar[32];
32
+typedef struct encinf
34
+ unsigned short prefixes_size;
35
+ unsigned short bytemap_size;
37
+ PrefixMap *prefixes;
38
+ unsigned short *bytemap;
41
+typedef struct encmaphdr
45
+ unsigned short pfsize;
46
+ unsigned short bmsize;
50
+/*================================================================
51
+** Structure of Encoding map binary encoding
53
+** Note that all shorts and ints are in network order,
54
+** so when packing or unpacking with perl, use 'n' and 'N' respectively.
55
+** In C, use the htonl family of functions.
57
+** The basic structure is:
59
+** _______________________
60
+** |Header (including map expat needs for 1st byte)
61
+** |PrefixMap * pfsize
62
+** | This section isn't included for single-byte encodings.
63
+** | For multiple byte encodings, when a byte represents a prefix
64
+** | then it indexes into this vector instead of mapping to a
65
+** | Unicode character. The PrefixMap type is declared above. The
66
+** | ispfx and ischar fields are bitvectors indicating whether
67
+** | the byte being mapped is a prefix or character respectively.
68
+** | If neither is set, then the character is not mapped to Unicode.
70
+** | The min field is the 1st byte mapped for this prefix; the
71
+** | len field is the number of bytes mapped; and bmap_start is
72
+** | the starting index of the map for this prefix in the overall
73
+** | map (next section).
74
+** |unsigned short * bmsize
75
+** | This section also is omitted for single-byte encodings.
76
+** | Each short is either a Unicode scalar or an index into the
77
+** | PrefixMap vector.
79
+** The header for these files is declared above as the Encmap_Header type.
80
+** The magic field is a magic number which should match the ENCMAP_MAGIC
81
+** macro above. The next 40 bytes stores IANA registered name for the
82
+** encoding. The pfsize field holds the number of PrefixMaps, which should
83
+** be zero for single byte encodings. The bmsize field holds the number of
84
+** shorts used for the overall map.
86
+** The map field contains either the Unicode scalar encoded by the 1st byte
87
+** or -n where n is the number of bytes that such a 1st byte implies (Expat
88
+** requires that the number of bytes to encode a character is indicated by
89
+** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
91
+** If the encoding is a multiple byte encoding, then there will be PrefixMap
92
+** and character map sections. The 1st PrefixMap (index 0), covers a range
93
+** of bytes that includes all 1st byte prefixes.
95
+** Look at convert_to_unicode in Expat.xs to see how this data structure
99
+#endif /* ndef ENCODING_H */
101
+++ ruby-xmlparser-0.7.2/ext/extconf.rb
105
+# --with-perl-enc-map[=/path/to/enc-map]
106
+# --with-expat-dir=/path/to/expat
107
+# --with-expat-lib=/path/to/expat/lib
108
+# --with-expat-include=/path/to/expat/include
113
+perl= ENV['PERL'] || 'perl'
115
+## Encoding maps may be stored in $perl_archlib/XML/Parser/Encodins/
116
+#perl_archlib = '/usr/lib/perl5/site_perl/5.005/i586-linux'
117
+#perl_archlib = '/usr/local/lib'
118
+perl_archlib = `#{perl} -e 'use Config; print $Config{"archlib"}'`
119
+xml_enc_path = with_config("perl-enc-map")
120
+if xml_enc_path == true
121
+ xml_enc_path = perl_archlib + "/XML/Parser/Encodings"
124
+##$CFLAGS="-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" +
125
+## ' -DXML_ENC_PATH=getenv\(\"XML_ENC_PATH\"\)' +
127
+#$CFLAGS = "-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok"
128
+#$LDFLAGS = "-L#{cwd}/expat/xmlparse -Wl,-rpath,/usr/local/lib"
129
+#$LDFLAGS = "-L#{cwd}/expat/xmlparse"
131
+#dir_config("xmltok")
132
+#dir_config("xmlparse")
134
+ $CFLAGS += " -DXML_ENC_PATH=\\\"#{xml_enc_path}\\\""
137
+#if have_header("xmlparse.h") || have_header("expat.h")
138
+if have_header("expat.h") || have_header("xmlparse.h")
139
+ if have_library("expat", "XML_ParserCreate") ||
140
+ have_library("xmltok", "XML_ParserCreate")
141
+ if have_func("XML_SetNotStandaloneHandler")
142
+ $CFLAGS += " -DNEW_EXPAT"
144
+ if have_func("XML_SetParamEntityParsing")
145
+ $CFLAGS += " -DXML_DTD"
147
+# if have_func("XML_SetExternalParsedEntityDeclHandler")
148
+# $CFLAGS += " -DEXPAT_1_2"
150
+ have_func("XML_SetDoctypeDeclHandler")
151
+ have_func("XML_ParserReset")
152
+ have_func("XML_SetSkippedEntityHandler")
153
+ have_func("XML_GetFeatureList")
154
+ have_func("XML_UseForeignDTD")
155
+ have_func("XML_GetIdAttributeIndex")
156
+ have_library("socket", "ntohl")
157
+ have_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
158
+ create_makefile("xmlparser")
162
+++ ruby-xmlparser-0.7.2/ext/xmlparser.c
165
+ * Expat (XML Parser Toolkit) wrapper for Ruby
166
+ * Dec 15, 2009 yoshidam version 0.7.0 support Ruby 1.9.1
167
+ * Feb 16, 2004 yoshidam version 0.6.8 taint output string
168
+ * Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
169
+ * Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
170
+ * Sep 20, 2002 yoshidam version 0.6.5 fix reset method
171
+ * Apr 4, 2002 yoshidam version 0.6.3 change event code values
172
+ * Oct 10, 2000 yoshidam version 0.6.1 support expat-1.2
173
+ * Oct 6, 2000 yoshidam version 0.6.0 support expat-1.95.0
174
+ * Jun 28, 1999 yoshidam version 0.5.18 define initialize for Ruby 1.5
175
+ * Jun 28, 1999 yoshidam version 0.5.15 support start/endDoctypeDecl
176
+ * Jun 28, 1999 yoshidam version 0.5.14 support setParamEntityParsing
177
+ * Apr 28, 1999 yoshidam version 0.5.11 support notStandalone
178
+ * Mar 29, 1998 yoshidam version 0.5.9 optimize for Ruby 1.3
179
+ * Mar 8, 1998 yoshidam version 0.5.7 support start/endNamespaceDecl
180
+ * Jan 14, 1998 yoshidam version 0.5.4 support start/endCdataSection
181
+ * Jan 10, 1998 yoshidam version 0.5.3 support encoding map
182
+ * Nov 24, 1998 yoshidam version 0.5.0 support TEST version of expat
183
+ * Nov 5, 1998 yoshidam version 0.4.18 mIDs are initialized in Init_xmlparser
184
+ * Oct 28, 1998 yoshidam version 0.4.17 mIDs are stored into static vars
185
+ * Oct 13, 1998 yoshidam version 0.4.12 debug and speed up myEncodingConv
186
+ * Oct 7, 1998 yoshidam version 0.4.11 hold internal object into ivar
187
+ * Sep 18, 1998 yoshidam version 0.4.6
188
+ * Sep 8, 1998 yoshidam version 0.4.4
189
+ * Sep 3, 1998 yoshidam version 0.4.3
190
+ * Sep 1, 1998 yoshidam version 0.4.2
191
+ * Aug 28, 1998 yoshidam version 0.4.1
192
+ * Aug 22, 1998 yoshidam version 0.4.0
193
+ * Jul 6, 1998 yoshidam version 0.2
194
+ * Jun 30, 1998 yoshidam version 0.1
196
+ * XML_ENC_PATH: path of encoding map for Perl
197
+ * HAVE_XML_USEFOREIGNDTD: expat 1.95.5
198
+ * HAVE_XML_GETFEATURELIST: expat 1.95.5
199
+ * HAVE_XML_SETSKIPPEDENTITYHANDLER: expat 1.95.4
200
+ * HAVE_XML_PARSERRESET: expat 1.95.3
201
+ * HAVE_EXPAT_H: expat 1.95.0
202
+ * HAVE_XML_SETDOCTYPEDECLHANDLER: expat 19990728
203
+ * XML_DTD: expat 19990626
204
+ * NEW_EXPAT: expat 1.1
208
+#ifdef HAVE_RUBY_IO_H
209
+# include "ruby/io.h"
211
+# include "rubyio.h"
218
+# include "xmlparse.h"
221
+# include <limits.h>
222
+# include <sys/stat.h>
223
+# include "encoding.h"
225
+# define PATH_MAX 256
230
+# define RSTRING_PTR(s) (RSTRING(s)->ptr)
231
+# define RSTRING_LEN(s) (RSTRING(s)->len)
234
+#ifdef HAVE_RUBY_ENCODING_H
235
+static rb_encoding* enc_xml;
238
+static VALUE eXMLParserError;
239
+static VALUE cXMLParser;
240
+static VALUE cXMLEncoding;
242
+static ID id_startElementHandler;
243
+static ID id_endElementHandler;
244
+static ID id_characterDataHandler;
245
+static ID id_processingInstructionHandler;
246
+static ID id_defaultHandler;
247
+static ID id_defaultExpandHandler;
248
+static ID id_unparsedEntityDeclHandler;
249
+static ID id_notationDeclHandler;
250
+static ID id_externalEntityRefHandler;
251
+static ID id_unknownEncoding;
252
+static ID id_convert;
254
+static ID id_commentHandler;
255
+static ID id_startCdataSectionHandler;
256
+static ID id_endCdataSectionHandler;
257
+static ID id_startNamespaceDeclHandler;
258
+static ID id_endNamespaceDeclHandler;
259
+static ID id_notStandaloneHandler;
261
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
262
+static ID id_startDoctypeDeclHandler;
263
+static ID id_endDoctypeDeclHandler;
266
+static ID id_elementDeclHandler;
267
+static ID id_attlistDeclHandler;
268
+static ID id_xmlDeclHandler;
269
+static ID id_entityDeclHandler;
272
+static ID id_externalParsedEntityDeclHandler;
273
+static ID id_internalParsedEntityDeclHandler;
275
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
276
+static ID id_skippedEntityHandler;
279
+#define GET_PARSER(obj, parser) \
280
+ Data_Get_Struct(obj, XMLParser, parser)
282
+typedef struct _XMLParser {
285
+ int defaultCurrent;
287
+ const XML_Char** lastAttrs;
292
+ const XML_Char *detectedEncoding;
295
+static VALUE symDEFAULT;
296
+static VALUE symSTART_ELEM;
297
+static VALUE symEND_ELEM;
298
+static VALUE symCDATA;
300
+static VALUE symUNPARSED_ENTITY_DECL;
301
+static VALUE symNOTATION_DECL;
302
+static VALUE symEXTERNAL_ENTITY_REF;
304
+static VALUE symCOMMENT;
305
+static VALUE symSTART_CDATA;
306
+static VALUE symEND_CDATA;
307
+static VALUE symSTART_NAMESPACE_DECL;
308
+static VALUE symEND_NAMESPACE_DECL;
310
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
311
+static VALUE symSTART_DOCTYPE_DECL;
312
+static VALUE symEND_DOCTYPE_DECL;
315
+static VALUE symELEMENT_DECL;
316
+static VALUE symATTLIST_DECL;
317
+static VALUE symXML_DECL;
318
+static VALUE symENTITY_DECL;
321
+static VALUE symEXTERNAL_PARSED_ENTITY_DECL;
322
+static VALUE symINTERNAL_PARSED_ENTITY_DECL;
325
+static VALUE symUNKNOWN_ENCODING;
327
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
328
+static VALUE symSKIPPED_ENTITY;
333
+XMLParser_free(XMLParser* parser)
335
+ /* fprintf(stderr, "Delete XMLParser: %p->%p\n", parser, parser->parser);*/
336
+ if (parser->parser) {
337
+ XML_ParserFree(parser->parser);
338
+ parser->parser = NULL;
344
+XMLParser_mark(XMLParser* parser)
346
+ /* fprintf(stderr, "Mark XMLParser: %p->%p\n", parser, parser->parser);*/
347
+ if (!NIL_P(parser->parent)) {
349
+ GET_PARSER(parser->parent, parent);
350
+ rb_gc_mark(parser->parent);
355
+taintParser(XMLParser* parser) {
356
+ parser->tainted |= 1;
357
+ if (!NIL_P(parser->parent) && !parser->context) {
359
+ GET_PARSER(parser->parent, parent);
360
+ taintParser(parent);
365
+taintObject(XMLParser* parser, VALUE obj) {
366
+ if (parser->tainted)
370
+#define TO_(o) (taintObject(parser, o))
373
+freezeObject(VALUE obj) {
377
+#define FO_(o) (freezeObject(o))
379
+#ifdef HAVE_RUBY_ENCODING_H
380
+# define ENC_(o) (rb_enc_associate(o, enc_xml))
382
+# define ENC_(o) (o)
386
+/* Event handlers for iterator */
388
+iterStartElementHandler(void *recv,
389
+ const XML_Char *name, const XML_Char **atts)
394
+ GET_PARSER(recv, parser);
396
+ parser->lastAttrs = atts;
398
+ attrhash = rb_hash_new();
400
+ const char* key = *atts++;
401
+ const char* val = *atts++;
402
+ rb_hash_aset(attrhash,
403
+ FO_(TO_(ENC_(rb_str_new2((char*)key)))),
404
+ TO_(ENC_(rb_str_new2((char*)val))));
407
+ rb_yield(rb_ary_new3(4, symSTART_ELEM,
408
+ TO_(ENC_(rb_str_new2((char*)name))), attrhash, recv));
409
+ if (parser->defaultCurrent) {
410
+ parser->defaultCurrent = 0;
411
+ XML_DefaultCurrent(parser->parser);
416
+iterEndElementHandler(void *recv,
417
+ const XML_Char *name)
420
+ GET_PARSER(recv, parser);
421
+ rb_yield(rb_ary_new3(4, symEND_ELEM,
422
+ TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
423
+ if (parser->defaultCurrent) {
424
+ parser->defaultCurrent = 0;
425
+ XML_DefaultCurrent(parser->parser);
430
+iterCharacterDataHandler(void *recv,
435
+ GET_PARSER(recv, parser);
436
+ rb_yield(rb_ary_new3(4, symCDATA,
437
+ Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
438
+ if (parser->defaultCurrent) {
439
+ parser->defaultCurrent = 0;
440
+ XML_DefaultCurrent(parser->parser);
445
+iterProcessingInstructionHandler(void *recv,
446
+ const XML_Char *target,
447
+ const XML_Char *data)
450
+ GET_PARSER(recv, parser);
451
+ rb_yield(rb_ary_new3(4, symPI,
452
+ TO_(ENC_(rb_str_new2((char*)target))),
453
+ TO_(ENC_(rb_str_new2((char*)data))), recv));
454
+ if (parser->defaultCurrent) {
455
+ parser->defaultCurrent = 0;
456
+ XML_DefaultCurrent(parser->parser);
461
+iterDefaultHandler(void *recv,
466
+ GET_PARSER(recv, parser);
467
+ rb_yield(rb_ary_new3(4, symDEFAULT,
468
+ Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
469
+ if (parser->defaultCurrent) {
470
+ parser->defaultCurrent = 0;
471
+ /* XML_DefaultCurrent shoould not call in defaultHandler */
472
+ /* XML_DefaultCurrent(parser->parser); */
477
+iterUnparsedEntityDeclHandler(void *recv,
478
+ const XML_Char *entityName,
479
+ const XML_Char *base,
480
+ const XML_Char *systemId,
481
+ const XML_Char *publicId,
482
+ const XML_Char *notationName)
487
+ GET_PARSER(recv, parser);
488
+ valary = rb_ary_new3(4, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
489
+ TO_(ENC_(rb_str_new2((char*)systemId))),
490
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
491
+ TO_(ENC_(rb_str_new2((char*)notationName))));
492
+ rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
493
+ TO_(ENC_(rb_str_new2((char*)entityName))),
495
+ if (parser->defaultCurrent) {
496
+ parser->defaultCurrent = 0;
497
+ XML_DefaultCurrent(parser->parser);
502
+iterNotationDeclHandler(void *recv,
503
+ const XML_Char *notationName,
504
+ const XML_Char *base,
505
+ const XML_Char *systemId,
506
+ const XML_Char *publicId)
511
+ GET_PARSER(recv, parser);
512
+ valary = rb_ary_new3(3,
513
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
514
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
515
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
516
+ rb_yield(rb_ary_new3(4, symNOTATION_DECL,
517
+ TO_(ENC_(rb_str_new2((char*)notationName))),
519
+ if (parser->defaultCurrent) {
520
+ parser->defaultCurrent = 0;
521
+ XML_DefaultCurrent(parser->parser);
526
+iterExternalEntityRefHandler(XML_Parser xmlparser,
527
+ const XML_Char *context,
528
+ const XML_Char *base,
529
+ const XML_Char *systemId,
530
+ const XML_Char *publicId)
537
+ recv = (VALUE)XML_GetUserData(xmlparser);
538
+ GET_PARSER(recv, parser);
539
+ valary = rb_ary_new3(3,
540
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
541
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
542
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
543
+ ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
544
+ (context ? TO_(ENC_(rb_str_new2((char*)context))) : Qnil),
546
+ if (parser->defaultCurrent) {
547
+ parser->defaultCurrent = 0;
548
+ XML_DefaultCurrent(parser->parser);
550
+ /* The error status in this iterator block should be returned
551
+ by the exception. */
557
+iterCommentHandler(void *recv,
561
+ GET_PARSER(recv, parser);
562
+ rb_yield(rb_ary_new3(4, symCOMMENT,
563
+ Qnil, TO_(ENC_(rb_str_new2((char*)s))), recv));
564
+ if (parser->defaultCurrent) {
565
+ parser->defaultCurrent = 0;
566
+ XML_DefaultCurrent(parser->parser);
571
+iterStartCdataSectionHandler(void *recv)
574
+ GET_PARSER(recv, parser);
575
+ rb_yield(rb_ary_new3(4, symSTART_CDATA, Qnil, Qnil, recv));
576
+ if (parser->defaultCurrent) {
577
+ parser->defaultCurrent = 0;
578
+ XML_DefaultCurrent(parser->parser);
583
+iterEndCdataSectionHandler(void *recv)
586
+ GET_PARSER(recv, parser);
587
+ rb_yield(rb_ary_new3(4, symEND_CDATA, Qnil, Qnil, recv));
588
+ if (parser->defaultCurrent) {
589
+ parser->defaultCurrent = 0;
590
+ XML_DefaultCurrent(parser->parser);
595
+iterStartNamespaceDeclHandler(void *recv,
596
+ const XML_Char *prefix,
597
+ const XML_Char *uri)
600
+ GET_PARSER(recv, parser);
601
+ rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
602
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
603
+ (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil), recv));
604
+ if (parser->defaultCurrent) {
605
+ parser->defaultCurrent = 0;
606
+ XML_DefaultCurrent(parser->parser);
611
+iterEndNamespaceDeclHandler(void *recv,
612
+ const XML_Char *prefix)
615
+ GET_PARSER(recv, parser);
616
+ rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
617
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
619
+ if (parser->defaultCurrent) {
620
+ parser->defaultCurrent = 0;
621
+ XML_DefaultCurrent(parser->parser);
626
+#ifdef HAVE_XML_SETPARAMENTITYPARSING
629
+iterStartDoctypeDeclHandler(void *recv,
630
+ const XML_Char *doctypeName,
631
+ const XML_Char *sysid,
632
+ const XML_Char *pubid,
633
+ int has_internal_subset)
635
+iterStartDoctypeDeclHandler(void *recv,
636
+ const XML_Char *doctypeName)
640
+ VALUE valary = Qnil;
642
+ GET_PARSER(recv, parser);
644
+ valary = rb_ary_new3(3,
645
+ (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
646
+ (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
647
+ (has_internal_subset ? Qtrue : Qfalse));
649
+ rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
650
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
652
+ if (parser->defaultCurrent) {
653
+ parser->defaultCurrent = 0;
654
+ XML_DefaultCurrent(parser->parser);
659
+iterEndDoctypeDeclHandler(void *recv)
662
+ GET_PARSER(recv, parser);
663
+ rb_yield(rb_ary_new3(4, symEND_DOCTYPE_DECL,
666
+ if (parser->defaultCurrent) {
667
+ parser->defaultCurrent = 0;
668
+ XML_DefaultCurrent(parser->parser);
677
+makeContentArray(XMLParser* parser, XML_Content* model)
679
+ static const char* content_type_name[] = {
680
+ NULL, "EMPTY", "ANY", "MIXED", "NAME", "CHOICE", "SEQ"
682
+ static const char* content_quant_name[] = {
686
+ VALUE children = Qnil;
687
+ const char* type_name = content_type_name[model->type];
688
+ const char* quant_name = content_quant_name[model->quant];
689
+ VALUE ret = rb_ary_new3(3,
690
+ TO_(ENC_(rb_str_new2((char*)type_name))),
691
+ TO_(ENC_(rb_str_new2((char*)quant_name))),
692
+ (model->name ? TO_(ENC_(rb_str_new2((char*)model->name))) :
694
+ if (model->numchildren > 0) {
695
+ children = rb_ary_new();
696
+ for (i = 0; i < model->numchildren; i++) {
697
+ VALUE child = makeContentArray(parser, model->children + i);
698
+ rb_ary_push(children, child);
701
+ rb_ary_push(ret, children);
708
+iterElementDeclHandler(void *recv,
709
+ const XML_Char *name,
710
+ XML_Content *model)
714
+ GET_PARSER(recv, parser);
715
+ content = makeContentArray(parser, model);
716
+ rb_yield(rb_ary_new3(4, symELEMENT_DECL,
717
+ TO_(ENC_(rb_str_new2(name))),
719
+ if (parser->defaultCurrent) {
720
+ parser->defaultCurrent = 0;
721
+ XML_DefaultCurrent(parser->parser);
726
+iterAttlistDeclHandler (void *recv,
727
+ const XML_Char *elname,
728
+ const XML_Char *attname,
729
+ const XML_Char *att_type,
730
+ const XML_Char *dflt,
736
+ GET_PARSER(recv, parser);
737
+ valary = rb_ary_new3(4,
738
+ TO_(ENC_(rb_str_new2((char*)attname))),
739
+ TO_(ENC_(rb_str_new2((char*)att_type))),
740
+ (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
741
+ (isrequired ? Qtrue : Qfalse));
742
+ rb_yield(rb_ary_new3(4, symATTLIST_DECL,
743
+ TO_(ENC_(rb_str_new2(elname))),
745
+ if (parser->defaultCurrent) {
746
+ parser->defaultCurrent = 0;
747
+ XML_DefaultCurrent(parser->parser);
752
+iterXmlDeclHandler (void *recv,
753
+ const XML_Char *version,
754
+ const XML_Char *encoding,
760
+ GET_PARSER(recv, parser);
761
+ valary = rb_ary_new3(3,
762
+ (version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
763
+ (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
764
+ INT2FIX(standalone));
765
+ rb_yield(rb_ary_new3(4, symXML_DECL,
768
+ if (parser->defaultCurrent) {
769
+ parser->defaultCurrent = 0;
770
+ XML_DefaultCurrent(parser->parser);
775
+iterEntityDeclHandler (void *recv,
776
+ const XML_Char *entityName,
777
+ int is_parameter_entity,
778
+ const XML_Char *value,
780
+ const XML_Char *base,
781
+ const XML_Char *systemId,
782
+ const XML_Char *publicId,
783
+ const XML_Char *notationName)
788
+ GET_PARSER(recv, parser);
789
+ valary = rb_ary_new3(6,
790
+ (is_parameter_entity ? Qtrue : Qfalse),
791
+ TO_(ENC_(rb_str_new((char*)value, value_length))),
792
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
793
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
794
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
795
+ (notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
797
+ rb_yield(rb_ary_new3(4, symENTITY_DECL,
798
+ TO_(ENC_(rb_str_new2(entityName))),
800
+ if (parser->defaultCurrent) {
801
+ parser->defaultCurrent = 0;
802
+ XML_DefaultCurrent(parser->parser);
810
+iterExternalParsedEntityDeclHandler(void *recv,
811
+ const XML_Char *entityName,
812
+ const XML_Char *base,
813
+ const XML_Char *systemId,
814
+ const XML_Char *publicId)
819
+ GET_PARSER(recv, parser);
820
+ valary = rb_ary_new3(3, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
821
+ TO_(ENC_(rb_str_new2((char*)systemId))),
822
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
823
+ rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
824
+ TO_(ENC_(rb_str_new2((char*)entityName))),
826
+ if (parser->defaultCurrent) {
827
+ parser->defaultCurrent = 0;
828
+ XML_DefaultCurrent(parser->parser);
833
+iterInternalParsedEntityDeclHandler(void *recv,
834
+ const XML_Char *entityName,
835
+ const XML_Char *replacementText,
836
+ int replacementTextLength)
839
+ GET_PARSER(recv, parser);
840
+ rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
841
+ TO_(ENC_(rb_str_new2((char*)entityName))),
842
+ TO_(ENC_(rb_str_new((char*)replacementText,
843
+ replacementTextLength))), recv));
844
+ if (parser->defaultCurrent) {
845
+ parser->defaultCurrent = 0;
846
+ XML_DefaultCurrent(parser->parser);
851
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
853
+iterSkippedEntityHandler(void *recv,
854
+ const XML_Char *entityName,
855
+ int is_parameter_entity)
858
+ GET_PARSER(recv, parser);
859
+ rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
860
+ TO_(ENC_(rb_str_new2((char*)entityName))),
861
+ INT2FIX(is_parameter_entity), recv));
862
+ if (parser->defaultCurrent) {
863
+ parser->defaultCurrent = 0;
864
+ XML_DefaultCurrent(parser->parser);
871
+/* Event handlers for instance method */
873
+myStartElementHandler(void *recv,
874
+ const XML_Char *name, const XML_Char **atts)
879
+ GET_PARSER(recv, parser);
881
+ parser->lastAttrs = atts;
883
+ attrhash = rb_hash_new();
885
+ const char* key = *atts++;
886
+ const char* val = *atts++;
887
+ rb_hash_aset(attrhash,
888
+ FO_(TO_(ENC_(rb_str_new2((char*)key)))),
889
+ TO_(ENC_(rb_str_new2((char*)val))));
891
+ rb_funcall((VALUE)recv, id_startElementHandler, 2,
892
+ TO_(ENC_(rb_str_new2((char*)name))), attrhash);
896
+myEndElementHandler(void *recv,
897
+ const XML_Char *name)
900
+ GET_PARSER(recv, parser);
901
+ rb_funcall((VALUE)recv, id_endElementHandler, 1,
902
+ TO_(ENC_(rb_str_new2((char*)name))));
906
+myCharacterDataHandler(void *recv,
911
+ GET_PARSER(recv, parser);
912
+ rb_funcall((VALUE)recv, id_characterDataHandler, 1,
913
+ TO_(ENC_(rb_str_new((char*)s, len))));
917
+myProcessingInstructionHandler(void *recv,
918
+ const XML_Char *target,
919
+ const XML_Char *data)
922
+ GET_PARSER(recv, parser);
923
+ rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
924
+ TO_(ENC_(rb_str_new2((char*)target))),
925
+ TO_(ENC_(rb_str_new2((char*)data))));
929
+myDefaultHandler(void *recv,
934
+ GET_PARSER(recv, parser);
935
+ rb_funcall((VALUE)recv, id_defaultHandler, 1,
936
+ TO_(ENC_(rb_str_new((char*)s, len))));
941
+myDefaultExpandHandler(void *recv,
946
+ GET_PARSER(recv, parser);
947
+ rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
948
+ TO_(ENC_(rb_str_new((char*)s, len))));
953
+myUnparsedEntityDeclHandler(void *recv,
954
+ const XML_Char *entityName,
955
+ const XML_Char *base,
956
+ const XML_Char *systemId,
957
+ const XML_Char *publicId,
958
+ const XML_Char *notationName)
961
+ GET_PARSER(recv, parser);
962
+ rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
963
+ TO_(ENC_(rb_str_new2((char*)entityName))),
964
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
965
+ TO_(ENC_(rb_str_new2((char*)systemId))),
966
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
967
+ TO_(ENC_(rb_str_new2((char*)notationName))));
971
+myNotationDeclHandler(void *recv,
972
+ const XML_Char *notationName,
973
+ const XML_Char *base,
974
+ const XML_Char *systemId,
975
+ const XML_Char *publicId)
978
+ GET_PARSER(recv, parser);
979
+ rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
980
+ TO_(ENC_(rb_str_new2((char*)notationName))),
981
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
982
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
983
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
987
+myExternalEntityRefHandler(XML_Parser xmlparser,
988
+ const XML_Char *context,
989
+ const XML_Char *base,
990
+ const XML_Char *systemId,
991
+ const XML_Char *publicId)
997
+ recv = (VALUE)XML_GetUserData(xmlparser);
998
+ GET_PARSER(recv, parser);
999
+ ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
1000
+ (context ? TO_(ENC_(rb_str_new2((char*)context))): Qnil),
1001
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
1002
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
1003
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
1004
+ /* The error status in this handler should be returned
1005
+ by the exception. */
1011
+myCommentHandler(void *recv,
1012
+ const XML_Char *s)
1014
+ XMLParser* parser;
1015
+ GET_PARSER(recv, parser);
1016
+ rb_funcall((VALUE)recv, id_commentHandler, 1,
1017
+ TO_(ENC_(rb_str_new2((char*)s))));
1021
+myStartCdataSectionHandler(void *recv)
1023
+ XMLParser* parser;
1024
+ GET_PARSER(recv, parser);
1025
+ rb_funcall((VALUE)recv, id_startCdataSectionHandler, 0);
1029
+myEndCdataSectionHandler(void *recv)
1031
+ XMLParser* parser;
1032
+ GET_PARSER(recv, parser);
1033
+ rb_funcall((VALUE)recv, id_endCdataSectionHandler, 0);
1037
+myStartNamespaceDeclHandler(void *recv,
1038
+ const XML_Char *prefix,
1039
+ const XML_Char *uri)
1041
+ XMLParser* parser;
1042
+ GET_PARSER(recv, parser);
1043
+ rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
1044
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
1045
+ (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil));
1049
+myEndNamespaceDeclHandler(void *recv,
1050
+ const XML_Char *prefix)
1052
+ XMLParser* parser;
1053
+ GET_PARSER(recv, parser);
1054
+ rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
1055
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil));
1059
+myNotStandaloneHandler(void *recv)
1061
+ XMLParser* parser;
1064
+ GET_PARSER(recv, parser);
1065
+ v = rb_funcall((VALUE)recv, id_notStandaloneHandler, 0);
1066
+ Check_Type(v, T_FIXNUM);
1067
+ return FIX2INT(v);
1071
+#ifdef HAVE_XML_SETPARAMENTITYPARSING
1073
+#ifdef HAVE_EXPAT_H
1074
+myStartDoctypeDeclHandler(void *recv,
1075
+ const XML_Char *doctypeName,
1076
+ const XML_Char *sysid,
1077
+ const XML_Char *pubid,
1078
+ int has_internal_subset)
1080
+myStartDoctypeDeclHandler(void *recv,
1081
+ const XML_Char *doctypeName)
1084
+ XMLParser* parser;
1085
+ GET_PARSER(recv, parser);
1086
+#ifdef HAVE_EXPAT_H
1087
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
1088
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
1089
+ (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
1090
+ (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
1091
+ (has_internal_subset ? Qtrue : Qfalse));
1093
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
1094
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
1095
+ Qnil, Qnil, Qfalse);
1100
+myEndDoctypeDeclHandler(void *recv)
1102
+ XMLParser* parser;
1103
+ GET_PARSER(recv, parser);
1104
+ rb_funcall((VALUE)recv, id_endDoctypeDeclHandler, 0);
1109
+#ifdef HAVE_EXPAT_H
1112
+myElementDeclHandler(void *recv,
1113
+ const XML_Char *name,
1114
+ XML_Content *model)
1116
+ XMLParser* parser;
1118
+ GET_PARSER(recv, parser);
1119
+ content = makeContentArray(parser, model);
1120
+ rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
1121
+ TO_(ENC_(rb_str_new2(name))), content);
1125
+myAttlistDeclHandler (void *recv,
1126
+ const XML_Char *elname,
1127
+ const XML_Char *attname,
1128
+ const XML_Char *att_type,
1129
+ const XML_Char *dflt,
1132
+ XMLParser* parser;
1133
+ GET_PARSER(recv, parser);
1134
+ rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
1135
+ TO_(ENC_(rb_str_new2(elname))),
1136
+ TO_(ENC_(rb_str_new2((char*)attname))),
1137
+ TO_(ENC_(rb_str_new2((char*)att_type))),
1138
+ (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
1139
+ (isrequired ? Qtrue : Qfalse));
1143
+myXmlDeclHandler (void *recv,
1144
+ const XML_Char *version,
1145
+ const XML_Char *encoding,
1148
+ XMLParser* parser;
1149
+ GET_PARSER(recv, parser);
1150
+ rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
1151
+ (version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
1152
+ (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
1153
+ INT2FIX(standalone));
1157
+myEntityDeclHandler (void *recv,
1158
+ const XML_Char *entityName,
1159
+ int is_parameter_entity,
1160
+ const XML_Char *value,
1162
+ const XML_Char *base,
1163
+ const XML_Char *systemId,
1164
+ const XML_Char *publicId,
1165
+ const XML_Char *notationName)
1167
+ XMLParser* parser;
1168
+ GET_PARSER(recv, parser);
1169
+ rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
1170
+ TO_(ENC_(rb_str_new2(entityName))),
1171
+ (is_parameter_entity ? Qtrue : Qfalse),
1172
+ TO_(ENC_(rb_str_new((char*)value, value_length))),
1173
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
1174
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
1175
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
1176
+ (notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
1184
+myExternalParsedEntityDeclHandler(void *recv,
1185
+ const XML_Char *entityName,
1186
+ const XML_Char *base,
1187
+ const XML_Char *systemId,
1188
+ const XML_Char *publicId)
1190
+ XMLParser* parser;
1191
+ GET_PARSER(recv, parser);
1192
+ rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
1193
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1194
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
1195
+ TO_(ENC_(rb_str_new2((char*)systemId))),
1196
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
1200
+myInternalParsedEntityDeclHandler(void *recv,
1201
+ const XML_Char *entityName,
1202
+ const XML_Char *replacementText,
1203
+ int replacementTextLength)
1205
+ XMLParser* parser;
1206
+ GET_PARSER(recv, parser);
1207
+ rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
1208
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1209
+ TO_(ENC_(rb_str_new((char*)replacementText,
1210
+ replacementTextLength))));
1216
+XMLEncoding_map(VALUE obj, VALUE i)
1222
+XMLEncoding_convert(VALUE obj, VALUE str)
1224
+ return INT2FIX('?');
1228
+myEncodingConv(void *data, const char *s)
1232
+ int slen = RSTRING_PTR(rb_ivar_get((VALUE)data,
1233
+ id_map))[*(unsigned char*)s];
1235
+ v = rb_funcall((VALUE)data, id_convert, 1, ENC_(rb_str_new((char*)s, -slen)));
1236
+ switch (TYPE(v)) {
1238
+ return FIX2INT(v);
1240
+ len = RSTRING_LEN(v);
1242
+ return (unsigned char)*RSTRING_PTR(v);
1244
+ else if (len >= 2) {
1245
+ return (unsigned char)*RSTRING_PTR(v) |
1246
+ (unsigned char)*(RSTRING_PTR(v) + 1) << 8;
1254
+iterUnknownEncodingHandler(void *recv,
1255
+ const XML_Char *name,
1256
+ XML_Encoding *info)
1258
+ XMLParser* parser;
1261
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1264
+ GET_PARSER(recv, parser);
1265
+ ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
1266
+ TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
1267
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1269
+ ID mid = rb_intern("map");
1270
+ VALUE cmap = rb_str_new(NULL, 256);
1271
+ rb_ivar_set(ret, id_map, cmap);
1273
+ for (i = 0; i < 256; i++) {
1274
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1275
+ RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
1277
+ /* protect object form GC */
1278
+ rb_ivar_set(recv, rb_intern("_encoding"), ret);
1279
+ info->data = (void*)ret;
1280
+ info->convert = myEncodingConv;
1288
+#ifdef XML_ENC_PATH
1290
+ * Encoding map functions come from XML::Parser Version 2.19
1292
+ * Copyright 1998 Larry Wall and Clark Cooper
1293
+ * All rights reserved.
1295
+ * This program is free software; you can redistribute it and/or
1296
+ * modify it under the same terms as Perl itself.
1299
+getEncinfo(char* data, int size)
1301
+ Encmap_Header* header = (Encmap_Header*)data;
1302
+ unsigned short prefixes_size;
1303
+ unsigned short bytemap_size;
1306
+ PrefixMap* prefixes;
1307
+ unsigned short *bytemap;
1309
+ if (size < sizeof(Encmap_Header) || ntohl(header->magic) != ENCMAP_MAGIC)
1311
+ prefixes_size = ntohs(header->pfsize);
1312
+ bytemap_size = ntohs(header->bmsize);
1313
+ if (size != (sizeof(Encmap_Header) +
1314
+ prefixes_size * sizeof(PrefixMap) +
1315
+ bytemap_size * sizeof(unsigned short)))
1317
+ if ((ret = (Encinfo*)malloc(sizeof(Encinfo))) == NULL) {
1320
+ ret->prefixes_size = prefixes_size;
1321
+ ret->bytemap_size = bytemap_size;
1322
+ for (i = 0; i < 256; i++)
1323
+ ret->firstmap[i] = ntohl(header->map[i]);
1324
+ prefixes = (PrefixMap*)(data + sizeof(Encmap_Header));
1325
+ bytemap = (unsigned short*)(data + sizeof(Encmap_Header)
1326
+ + sizeof(PrefixMap)*prefixes_size);
1327
+ if ((ret->prefixes =
1328
+ (PrefixMap*)malloc(sizeof(PrefixMap)*prefixes_size)) == NULL) {
1332
+ if ((ret->bytemap =
1333
+ (unsigned short*)malloc(sizeof(unsigned short)*bytemap_size)) == NULL) {
1334
+ free(ret->prefixes);
1338
+ for (i = 0; i < prefixes_size; i++, prefixes++) {
1339
+ ret->prefixes[i].min = prefixes->min;
1340
+ ret->prefixes[i].len = prefixes->len;
1341
+ ret->prefixes[i].bmap_start = ntohs(prefixes->bmap_start);
1342
+ memcpy(ret->prefixes[i].ispfx, prefixes->ispfx,
1343
+ sizeof(prefixes->ispfx) + sizeof(prefixes->ischar));
1345
+ for (i = 0; i < bytemap_size; i++)
1346
+ ret->bytemap[i] = ntohs(bytemap[i]);
1352
+convertEncoding(Encinfo* enc, const char* seq)
1354
+ PrefixMap* curpfx;
1358
+ for (count = 0; count < 4; count++) {
1359
+ unsigned char byte = (unsigned char)seq[count];
1360
+ unsigned char bndx;
1361
+ unsigned char bmsk;
1364
+ curpfx = &enc->prefixes[index];
1365
+ offset = ((int)byte) - curpfx->min;
1368
+ if (offset >= curpfx->len && curpfx->len != 0)
1372
+ bmsk = 1 << (byte & 0x7);
1374
+ if (curpfx->ispfx[bndx] & bmsk) {
1375
+ index = enc->bytemap[curpfx->bmap_start + offset];
1377
+ else if (curpfx->ischar[bndx] & bmsk) {
1378
+ return enc->bytemap[curpfx->bmap_start + offset];
1388
+releaseEncoding(Encinfo* enc)
1391
+ if (enc->prefixes)
1392
+ free(enc->prefixes);
1394
+ free(enc->bytemap);
1400
+findEncoding(const char* encname)
1407
+ char file[PATH_MAX] = "\0";
1411
+ const char sepchar = '\\';
1413
+ const char sepchar = '/';
1415
+ const char* const encext = ".enc";
1418
+ /* make map file path */
1419
+ if (XML_ENC_PATH != NULL) {
1420
+ strncpy(file, XML_ENC_PATH, PATH_MAX - 1);
1421
+ file[PATH_MAX - 1] = '\0';
1423
+ len = strlen(file);
1424
+ if (len > 0 && len < PATH_MAX - 1 && file[len - 1] != sepchar)
1425
+ file[len++] = sepchar;
1426
+ for (p = encname; *p && len < PATH_MAX - 1; p++, len++) {
1427
+ file[len] = tolower(*p);
1430
+ strncat(file, encext, PATH_MAX - len -1);
1432
+ if ((fp = fopen(file, "rb")) == NULL) {
1436
+ /* get file length */
1437
+ fstat(fileno(fp), &st);
1438
+ size = st.st_size;
1440
+ if ((buf = (char*)malloc(size)) == NULL) {
1445
+ fread(buf, 1, size, fp);
1447
+ enc = getEncinfo(buf, size);
1455
+myUnknownEncodingHandler(void *recv,
1456
+ const XML_Char *name,
1457
+ XML_Encoding *info)
1459
+ XMLParser* parser;
1462
+ GET_PARSER(recv, parser);
1463
+ parser->detectedEncoding = name;
1465
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1466
+#ifndef XML_ENC_PATH
1472
+ if ((enc = findEncoding(name)) != NULL) {
1473
+ memcpy(info->map, enc->firstmap, sizeof(int)*256);
1475
+ info->convert = (int(*)(void*,const char*))convertEncoding;
1476
+ info->release = (void(*)(void*))releaseEncoding;
1484
+ ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
1485
+ TO_(ENC_(rb_str_new2((char*)name))));
1486
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1488
+ ID mid = rb_intern("map");
1489
+ VALUE cmap = rb_str_new(NULL, 256);
1490
+ rb_ivar_set(ret, id_map, cmap);
1492
+ if (OBJ_TAINTED(ret))
1493
+ taintParser(parser);
1496
+ for (i = 0; i < 256; i++) {
1497
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1498
+ RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
1500
+ /* protect object form GC */
1501
+ rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
1502
+ info->data = (void*)ret;
1503
+ info->convert = myEncodingConv;
1511
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1513
+mySkippedEntityHandler(void *recv,
1514
+ const XML_Char *entityName,
1515
+ int is_parameter_entity)
1517
+ XMLParser* parser;
1518
+ GET_PARSER(recv, parser);
1519
+ rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
1520
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1521
+ INT2FIX(is_parameter_entity));
1528
+XMLParser_new(int argc, VALUE* argv, VALUE klass)
1530
+ XMLParser* parser;
1536
+ char* encoding = NULL;
1538
+ char* nssep = NULL;
1540
+ char* context = NULL;
1541
+ XMLParser* rootparser = NULL;
1542
+ VALUE parent = Qnil;
1544
+ count = rb_scan_args(argc, argv, "03", &arg1, &arg2, &arg3);
1546
+ /* new(encoding) */
1547
+ if (TYPE(arg1) != T_NIL) {
1548
+ Check_Type(arg1, T_STRING); /* encoding */
1549
+ encoding = RSTRING_PTR(arg1);
1552
+ else if (count == 2) {
1553
+ /* new(encoding, nschar) */
1554
+ /* new(parser, context) */
1556
+ if (TYPE(arg1) != T_DATA) {
1557
+ if (TYPE(arg1) != T_NIL) {
1558
+ Check_Type(arg1, T_STRING); /* encoding */
1559
+ encoding = RSTRING_PTR(arg1);
1561
+ Check_Type(arg2, T_STRING); /* nschar */
1562
+ nssep = RSTRING_PTR(arg2);
1566
+ Check_Type(arg1, T_DATA); /* parser */
1567
+ GET_PARSER(arg1, rootparser);
1568
+ if (!NIL_P(arg2)) {
1569
+ Check_Type(arg2, T_STRING); /* context */
1570
+ context = RSTRING_PTR(arg2);
1577
+ else if (count == 3) {
1578
+ /* new(parser, context, encoding) */
1579
+ Check_Type(arg1, T_DATA); /* parser */
1580
+ GET_PARSER(arg1, rootparser);
1581
+ if (!NIL_P(arg2)) {
1582
+ Check_Type(arg2, T_STRING); /* context */
1583
+ context = RSTRING_PTR(arg2);
1585
+ Check_Type(arg3, T_STRING); /* encoding */
1586
+ encoding = RSTRING_PTR(arg3);
1590
+ /* create object */
1591
+ obj = Data_Make_Struct(klass, XMLParser,
1592
+ XMLParser_mark, XMLParser_free, parser);
1593
+ /* create parser */
1594
+ if (rootparser == NULL) {
1596
+ if (nssep == NULL)
1597
+ parser->parser = XML_ParserCreate(encoding);
1599
+ parser->parser = XML_ParserCreateNS(encoding, nssep[0]);
1601
+ parser->parser = XML_ParserCreate(encoding);
1603
+ parser->tainted = 0;
1604
+ parser->context = NULL;
1607
+ parser->parser = XML_ExternalEntityParserCreate(rootparser->parser,
1608
+ context, encoding);
1609
+ /* clear all inhrited handlers,
1610
+ because handlers should be set in "parse" method */
1611
+ XML_SetElementHandler(parser->parser, NULL, NULL);
1612
+ XML_SetCharacterDataHandler(parser->parser, NULL);
1613
+ XML_SetProcessingInstructionHandler(parser->parser, NULL);
1614
+ XML_SetDefaultHandler(parser->parser, NULL);
1615
+ XML_SetUnparsedEntityDeclHandler(parser->parser, NULL);
1616
+ XML_SetNotationDeclHandler(parser->parser, NULL);
1617
+ XML_SetExternalEntityRefHandler(parser->parser, NULL);
1619
+ XML_SetCommentHandler(parser->parser, NULL);
1620
+ XML_SetCdataSectionHandler(parser->parser, NULL, NULL);
1621
+ XML_SetNamespaceDeclHandler(parser->parser, NULL, NULL);
1622
+ XML_SetNotStandaloneHandler(parser->parser, NULL);
1624
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1625
+ XML_SetDoctypeDeclHandler(parser->parser, NULL, NULL);
1627
+#ifdef HAVE_EXPAT_H
1628
+ XML_SetElementDeclHandler(parser->parser, NULL);
1629
+ XML_SetAttlistDeclHandler(parser->parser, NULL);
1630
+ XML_SetXmlDeclHandler(parser->parser, NULL);
1631
+ XML_SetEntityDeclHandler(parser->parser, NULL);
1634
+ XML_SetExternalParsedEntityDeclHandler(parser->parser, NULL);
1635
+ XML_SetInternalParsedEntityDeclHandler(parser->parser, NULL);
1637
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1638
+ XML_SetSkippedEntityHandler(parser->parser, NULL);
1640
+ if (rootparser->tainted)
1641
+ parser->tainted |= 1;
1642
+ parser->context = context;
1644
+ if (!parser->parser)
1645
+ rb_raise(eXMLParserError, "cannot create parser");
1647
+ /* setting up internal data */
1648
+ XML_SetUserData(parser->parser, (void*)obj);
1649
+ parser->iterator = 0;
1650
+ parser->defaultCurrent = 0;
1652
+ parser->lastAttrs = NULL;
1654
+ parser->parent = parent;
1655
+ parser->detectedEncoding = NULL;
1657
+ rb_obj_call_init(obj, argc, argv);
1663
+XMLParser_initialize(VALUE obj)
1668
+#ifdef HAVE_XML_PARSERRESET
1670
+XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1672
+ XMLParser* parser;
1673
+ VALUE vencoding = Qnil;
1674
+ char* encoding = NULL;
1677
+ count = rb_scan_args(argc, argv, "01", &vencoding);
1679
+ GET_PARSER(obj, parser);
1680
+ if (count > 0 && TYPE(vencoding) != T_NIL) {
1681
+ Check_Type(vencoding, T_STRING);
1682
+ encoding = RSTRING_PTR(vencoding);
1684
+ XML_ParserReset(parser->parser, encoding);
1685
+ /* setting up internal data */
1686
+ XML_SetUserData(parser->parser, (void*)obj);
1687
+ parser->iterator = 0;
1688
+ parser->defaultCurrent = 0;
1690
+ parser->lastAttrs = NULL;
1692
+ parser->tainted = 0;
1693
+ parser->detectedEncoding = NULL;
1700
+setup_evnet_handlers(XMLParser* parser, VALUE obj) {
1701
+ XML_StartElementHandler start = NULL;
1702
+ XML_EndElementHandler end = NULL;
1704
+ XML_StartCdataSectionHandler startC = NULL;
1705
+ XML_EndCdataSectionHandler endC = NULL;
1706
+ XML_StartNamespaceDeclHandler startNS = NULL;
1707
+ XML_EndNamespaceDeclHandler endNS = NULL;
1709
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1710
+ XML_StartDoctypeDeclHandler startDoctype = NULL;
1711
+ XML_EndDoctypeDeclHandler endDoctype = NULL;
1714
+ /* Call as iterator */
1715
+ if (parser->iterator) {
1716
+ XML_SetElementHandler(parser->parser,
1717
+ iterStartElementHandler, iterEndElementHandler);
1718
+ XML_SetCharacterDataHandler(parser->parser,
1719
+ iterCharacterDataHandler);
1720
+ XML_SetProcessingInstructionHandler(parser->parser,
1721
+ iterProcessingInstructionHandler);
1722
+ /* check dummy default handler */
1724
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1725
+ XML_SetDefaultHandlerExpand(parser->parser, iterDefaultHandler);
1728
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0))
1729
+ XML_SetDefaultHandler(parser->parser, iterDefaultHandler);
1731
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1732
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1733
+ iterUnparsedEntityDeclHandler);
1734
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1735
+ XML_SetNotationDeclHandler(parser->parser,
1736
+ iterNotationDeclHandler);
1737
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1738
+ XML_SetExternalEntityRefHandler(parser->parser,
1739
+ iterExternalEntityRefHandler);
1741
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1742
+ XML_SetCommentHandler(parser->parser, iterCommentHandler);
1744
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1745
+ startC = iterStartCdataSectionHandler;
1746
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1747
+ endC = iterEndCdataSectionHandler;
1748
+ if (startC || endC)
1749
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1751
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1752
+ startNS = iterStartNamespaceDeclHandler;
1753
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1754
+ endNS = iterEndNamespaceDeclHandler;
1755
+ if (startNS || endNS)
1756
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1757
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1758
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1760
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1761
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1762
+ startDoctype = iterStartDoctypeDeclHandler;
1763
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1764
+ endDoctype = iterEndDoctypeDeclHandler;
1765
+ if (startDoctype || endDoctype)
1766
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1768
+#ifdef HAVE_EXPAT_H
1769
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1770
+ XML_SetElementDeclHandler(parser->parser, iterElementDeclHandler);
1771
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1772
+ XML_SetAttlistDeclHandler(parser->parser, iterAttlistDeclHandler);
1773
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1774
+ XML_SetXmlDeclHandler(parser->parser, iterXmlDeclHandler);
1775
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1776
+ XML_SetEntityDeclHandler(parser->parser, iterEntityDeclHandler);
1779
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1780
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1781
+ iterExternalParsedEntityDeclHandler);
1782
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1783
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1784
+ iterInternalParsedEntityDeclHandler);
1786
+ /* Call non-iterator version of UnknownEncoding handler,
1787
+ because the porcedure block often returns the unexpected value. */
1788
+ XML_SetUnknownEncodingHandler(parser->parser,
1789
+ myUnknownEncodingHandler,
1791
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1792
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1793
+ XML_SetSkippedEntityHandler(parser->parser, iterSkippedEntityHandler);
1796
+ /* Call as not iterator */
1798
+ if (rb_method_boundp(CLASS_OF(obj), id_startElementHandler, 0))
1799
+ start = myStartElementHandler;
1800
+ if (rb_method_boundp(CLASS_OF(obj), id_endElementHandler, 0))
1801
+ end = myEndElementHandler;
1803
+ XML_SetElementHandler(parser->parser, start, end);
1804
+ if (rb_method_boundp(CLASS_OF(obj), id_characterDataHandler, 0))
1805
+ XML_SetCharacterDataHandler(parser->parser,
1806
+ myCharacterDataHandler);
1807
+ if (rb_method_boundp(CLASS_OF(obj),
1808
+ id_processingInstructionHandler, 0))
1809
+ XML_SetProcessingInstructionHandler(parser->parser,
1810
+ myProcessingInstructionHandler);
1812
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1813
+ XML_SetDefaultHandlerExpand(parser->parser, myDefaultExpandHandler);
1816
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0)) {
1817
+ XML_SetDefaultHandler(parser->parser, myDefaultHandler);
1819
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1820
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1821
+ myUnparsedEntityDeclHandler);
1822
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1823
+ XML_SetNotationDeclHandler(parser->parser,
1824
+ myNotationDeclHandler);
1825
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1826
+ XML_SetExternalEntityRefHandler(parser->parser,
1827
+ myExternalEntityRefHandler);
1829
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1830
+ XML_SetCommentHandler(parser->parser, myCommentHandler);
1832
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1833
+ startC = myStartCdataSectionHandler;
1834
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1835
+ endC = myEndCdataSectionHandler;
1836
+ if (startC || endC)
1837
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1839
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1840
+ startNS = myStartNamespaceDeclHandler;
1841
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1842
+ endNS = myEndNamespaceDeclHandler;
1843
+ if (startNS || endNS)
1844
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1845
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1846
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1848
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1849
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1850
+ startDoctype = myStartDoctypeDeclHandler;
1851
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1852
+ endDoctype = myEndDoctypeDeclHandler;
1853
+ if (startDoctype || endDoctype)
1854
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1856
+#ifdef HAVE_EXPAT_H
1857
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1858
+ XML_SetElementDeclHandler(parser->parser, myElementDeclHandler);
1859
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1860
+ XML_SetAttlistDeclHandler(parser->parser, myAttlistDeclHandler);
1861
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1862
+ XML_SetXmlDeclHandler(parser->parser, myXmlDeclHandler);
1863
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1864
+ XML_SetEntityDeclHandler(parser->parser, myEntityDeclHandler);
1867
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1868
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1869
+ myExternalParsedEntityDeclHandler);
1870
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1871
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1872
+ myInternalParsedEntityDeclHandler);
1874
+ XML_SetUnknownEncodingHandler(parser->parser,
1875
+ myUnknownEncodingHandler,
1877
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1878
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1879
+ XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
1887
+XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1889
+ XMLParser* parser;
1895
+ int fromStream = 0;
1896
+ ID mid = rb_intern("gets");
1897
+ ID linebuf = rb_intern("_linebuf");
1899
+ count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1900
+ /* If "str" has public "gets" method, it will be considered *stream* */
1901
+ if (!rb_obj_is_kind_of(str, rb_cString) &&
1902
+ rb_method_boundp(CLASS_OF(str), mid, 1)) {
1905
+ else if (!NIL_P(str)) {
1906
+ Check_Type(str, T_STRING);
1909
+ if (isFinal == Qtrue)
1911
+ else if (isFinal == Qfalse)
1914
+ rb_raise(rb_eTypeError, "not valid value");
1917
+ GET_PARSER(obj, parser);
1919
+ parser->iterator = rb_block_given_p();
1921
+ /* Setup event handlers */
1922
+ setup_evnet_handlers(parser, obj);
1924
+ /* Parse from stream (probably slightly slow) */
1928
+ if (OBJ_TAINTED(str))
1929
+ taintParser(parser);
1931
+ buf = rb_funcall(str, mid, 0);
1932
+ if (!NIL_P(buf)) {
1933
+ Check_Type(buf, T_STRING);
1934
+ if (OBJ_TAINTED(buf))
1935
+ taintParser(parser);
1936
+ rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
1937
+ ret = XML_Parse(parser->parser,
1938
+ RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
1941
+ ret = XML_Parse(parser->parser, NULL, 0, 1);
1944
+ int err = XML_GetErrorCode(parser->parser);
1945
+ const char* errStr = XML_ErrorString(err);
1946
+ rb_raise(eXMLParserError, (char*)errStr);
1948
+ } while (!NIL_P(buf));
1952
+ /* Parse string */
1953
+ if (!NIL_P(str)) {
1954
+#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
1957
+ if (OBJ_TAINTED(str))
1958
+ taintParser(parser);
1959
+ ret = XML_Parse(parser->parser,
1960
+ RSTRING_PTR(str), RSTRING_LEN(str), final);
1961
+#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
1962
+ /* Ruby 1.9.1 Encoding conversion */
1963
+ err = XML_GetErrorCode(parser->parser);
1964
+ if (final && err == XML_ERROR_UNKNOWN_ENCODING) {
1966
+ volatile VALUE encobj;
1967
+ volatile VALUE ustr;
1968
+ enc = rb_enc_find(parser->detectedEncoding);
1969
+ if ((int)ENC_TO_ENCINDEX(enc) != rb_ascii8bit_encindex()) {
1970
+ rb_enc_associate(str, enc);
1971
+ encobj = rb_enc_from_encoding(enc_xml);
1972
+ /* rb_str_encode may raises an exception */
1973
+ ustr = rb_str_encode(str, encobj, 0, Qnil);
1974
+ if (!NIL_P(ustr)) {
1975
+ XML_ParserReset(parser->parser, "utf-8");
1976
+ XML_SetUserData(parser->parser, (void*)obj);
1977
+ parser->defaultCurrent = 0;
1979
+ parser->lastAttrs = NULL;
1981
+ parser->detectedEncoding = NULL;
1982
+ setup_evnet_handlers(parser, obj);
1983
+ ret = XML_Parse(parser->parser,
1984
+ RSTRING_PTR(ustr), RSTRING_LEN(ustr), final);
1991
+ ret = XML_Parse(parser->parser, NULL, 0, final);
1993
+ int err = XML_GetErrorCode(parser->parser);
1994
+ const char* errStr = XML_ErrorString(err);
1995
+ rb_raise(eXMLParserError, (char*)errStr);
2003
+XMLParser_done(VALUE obj)
2005
+ XMLParser* parser;
2007
+ GET_PARSER(obj, parser);
2008
+ if (parser->parser) {
2009
+ XML_ParserFree(parser->parser);
2010
+ parser->parser = NULL;
2015
+/* defaultCurrent method */
2017
+XMLParser_defaultCurrent(VALUE obj)
2019
+ XMLParser* parser;
2021
+ GET_PARSER(obj, parser);
2022
+ if (!(parser->iterator)) {
2023
+ XML_DefaultCurrent(parser->parser);
2026
+ parser->defaultCurrent = 1;
2033
+XMLParser_getCurrentLineNumber(VALUE obj)
2035
+ XMLParser* parser;
2038
+ GET_PARSER(obj, parser);
2039
+ line = XML_GetCurrentLineNumber(parser->parser);
2041
+ return INT2FIX(line);
2044
+/* column method */
2046
+XMLParser_getCurrentColumnNumber(VALUE obj)
2048
+ XMLParser* parser;
2051
+ GET_PARSER(obj, parser);
2052
+ column = XML_GetCurrentColumnNumber(parser->parser);
2054
+ return INT2FIX(column);
2057
+/* byte index method */
2059
+XMLParser_getCurrentByteIndex(VALUE obj)
2061
+ XMLParser* parser;
2064
+ GET_PARSER(obj, parser);
2065
+ pos = XML_GetCurrentByteIndex(parser->parser);
2067
+ return INT2FIX(pos);
2072
+XMLParser_setBase(VALUE obj, VALUE base)
2074
+ XMLParser* parser;
2077
+ Check_Type(base, T_STRING);
2078
+ GET_PARSER(obj, parser);
2079
+ if (OBJ_TAINTED(base))
2080
+ taintParser(parser);
2081
+ ret = XML_SetBase(parser->parser, RSTRING_PTR(base));
2083
+ return INT2FIX(ret);
2088
+XMLParser_getBase(VALUE obj)
2090
+ XMLParser* parser;
2091
+ const XML_Char* ret;
2093
+ GET_PARSER(obj, parser);
2094
+ ret = XML_GetBase(parser->parser);
2098
+ return TO_(ENC_(rb_str_new2((char*)ret)));
2104
+XMLParser_getSpecifiedAttributes(VALUE obj)
2106
+ XMLParser* parser;
2108
+ const XML_Char** atts;
2111
+ GET_PARSER(obj, parser);
2112
+ atts = parser->lastAttrs;
2115
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
2116
+ attrhash = rb_hash_new();
2118
+ const char* key = *atts++;
2120
+ rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))),
2121
+ (count-- > 0) ? Qtrue: Qfalse);
2128
+XMLParser_getSpecifiedAttributes(VALUE obj)
2130
+ XMLParser* parser;
2132
+ const XML_Char** atts;
2135
+ GET_PARSER(obj, parser);
2136
+ atts = parser->lastAttrs;
2139
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
2140
+ attrarray = rb_ary_new2(count);
2141
+ for (i = 0; i < count; i++, atts+=2) {
2142
+ const char* key = *atts;
2143
+ rb_ary_push(attrarray, TO_(ENC_(rb_str_new2((char*)key))));
2151
+XMLParser_getCurrentByteCount(VALUE obj)
2153
+ XMLParser* parser;
2155
+ GET_PARSER(obj, parser);
2156
+ return INT2FIX(XML_GetCurrentByteCount(parser->parser));
2162
+XMLParser_setParamEntityParsing(VALUE obj, VALUE parsing)
2164
+ XMLParser* parser;
2167
+ Check_Type(parsing, T_FIXNUM);
2168
+ GET_PARSER(obj, parser);
2169
+ ret = XML_SetParamEntityParsing(parser->parser, FIX2INT(parsing));
2171
+ return INT2FIX(ret);
2176
+XMLParser_s_expatVersion(VALUE obj)
2178
+#if defined(HAVE_EXPAT_H)
2179
+ return ENC_(rb_str_new2(XML_ExpatVersion()));
2180
+#elif defined(EXPAT_1_2)
2181
+ return ENC_(rb_str_new2("1.2"));
2182
+#elif defined(NEW_EXPAT)
2183
+ return ENC_(rb_str_new2("1.1"));
2185
+ return ENC_(rb_str_new2("1.0"));
2189
+#ifdef HAVE_EXPAT_H
2191
+XMLParser_setReturnNSTriplet(VALUE obj, VALUE do_nst)
2193
+ XMLParser* parser;
2196
+ GET_PARSER(obj, parser);
2197
+ switch (TYPE(do_nst)) {
2205
+ nst = FIX2INT(do_nst);
2208
+ rb_raise(rb_eTypeError, "not valid value");
2210
+ XML_SetReturnNSTriplet(parser->parser, nst);
2217
+XMLParser_getInputContext(VALUE obj)
2219
+ XMLParser* parser;
2220
+ const char* buffer;
2225
+ GET_PARSER(obj, parser);
2226
+ buffer = XML_GetInputContext(parser->parser,
2229
+ if (buffer && size > 0) {
2230
+ ret = rb_ary_new3(2,
2231
+ TO_(ENC_(rb_str_new(buffer, size))),
2240
+XMLParser_getIdAttrribute(VALUE obj)
2242
+ XMLParser* parser;
2244
+ const XML_Char** atts;
2246
+ GET_PARSER(obj, parser);
2247
+ atts = parser->lastAttrs;
2250
+ idattr = XML_GetIdAttributeIndex(parser->parser);
2253
+ return TO_(ENC_(rb_str_new2((char*)atts[idattr])));
2257
+#ifdef HAVE_XML_USEFOREIGNDTD
2259
+XMLParser_useForeignDTD(VALUE obj, VALUE useDTD)
2261
+ XMLParser* parser;
2265
+ GET_PARSER(obj, parser);
2266
+ switch (TYPE(useDTD)) {
2274
+ dtd = FIX2INT(useDTD);
2277
+ rb_raise(rb_eTypeError, "not valid value");
2279
+ ret = XML_UseForeignDTD(parser->parser, dtd);
2281
+ return INT2FIX(ret);
2285
+#ifdef HAVE_XML_GETFEATURELIST
2287
+XMLParser_s_getFeatureList(VALUE obj)
2289
+ const XML_Feature* list;
2290
+ VALUE ret = rb_hash_new();
2292
+ list = XML_GetFeatureList();
2293
+ while (list && list->feature) {
2294
+ rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value));
2307
+#ifdef HAVE_RUBY_ENCODING_H
2308
+ enc_xml = rb_utf8_encoding();
2311
+ eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
2312
+ cXMLParser = rb_define_class("XMLParser", rb_cObject);
2313
+ cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);
2315
+ /* Class name aliases */
2316
+ if (rb_const_defined(rb_cObject, rb_intern("XML")) == Qtrue)
2317
+ mXML = rb_const_get(rb_cObject, rb_intern("XML"));
2319
+ mXML = rb_define_module("XML");
2320
+ rb_define_const(mXML, "ParserError", eXMLParserError);
2321
+ rb_define_const(cXMLParser, "Error", eXMLParserError);
2322
+ rb_define_const(mXML, "Parser", cXMLParser);
2323
+ rb_define_const(mXML, "Encoding", cXMLEncoding);
2325
+ rb_define_singleton_method(cXMLParser, "new", XMLParser_new, -1);
2326
+ rb_define_singleton_method(cXMLParser, "expatVersion",
2327
+ XMLParser_s_expatVersion, 0);
2328
+ rb_define_method(cXMLParser, "initialize", XMLParser_initialize, -1);
2329
+ rb_define_method(cXMLParser, "parse", XMLParser_parse, -1);
2330
+ rb_define_method(cXMLParser, "done", XMLParser_done, 0);
2331
+ rb_define_method(cXMLParser, "defaultCurrent", XMLParser_defaultCurrent, 0);
2332
+ rb_define_method(cXMLParser, "line", XMLParser_getCurrentLineNumber, 0);
2333
+ rb_define_method(cXMLParser, "column", XMLParser_getCurrentColumnNumber, 0);
2334
+ rb_define_method(cXMLParser, "byteIndex", XMLParser_getCurrentByteIndex, 0);
2335
+ rb_define_method(cXMLParser, "setBase", XMLParser_setBase, 1);
2336
+ rb_define_method(cXMLParser, "getBase", XMLParser_getBase, 0);
2338
+ rb_define_method(cXMLParser, "getSpecifiedAttributes",
2339
+ XMLParser_getSpecifiedAttributes, 0);
2340
+ rb_define_method(cXMLParser, "byteCount", XMLParser_getCurrentByteCount, 0);
2343
+ rb_define_method(cXMLParser, "setParamEntityParsing",
2344
+ XMLParser_setParamEntityParsing, 1);
2346
+#ifdef HAVE_EXPAT_H
2347
+ rb_define_method(cXMLParser, "setReturnNSTriplet",
2348
+ XMLParser_setReturnNSTriplet, 1);
2349
+ rb_define_method(cXMLParser, "getInputContext",
2350
+ XMLParser_getInputContext, 0);
2351
+ rb_define_method(cXMLParser, "getIdAttribute",
2352
+ XMLParser_getIdAttrribute, 0);
2355
+#ifdef HAVE_XML_PARSERRESET
2356
+ rb_define_method(cXMLParser, "reset", XMLParser_reset, -1);
2359
+ rb_define_method(cXMLEncoding, "map", XMLEncoding_map, 1);
2360
+ rb_define_method(cXMLEncoding, "convert", XMLEncoding_convert, 1);
2362
+#ifdef HAVE_XML_USEFOREIGNDTD
2363
+ rb_define_method(cXMLParser, "useForeignDTD",
2364
+ XMLParser_useForeignDTD, 1);
2366
+#ifdef HAVE_XML_GETFEATURELIST
2367
+ rb_define_singleton_method(cXMLParser, "getFeatureList",
2368
+ XMLParser_s_getFeatureList, 0);
2371
+#define DEFINE_EVENT_CODE(klass, name) \
2372
+ rb_define_const(klass, #name, sym##name = ID2SYM(rb_intern(#name)))
2374
+ DEFINE_EVENT_CODE(cXMLParser, START_ELEM);
2375
+ DEFINE_EVENT_CODE(cXMLParser, END_ELEM);
2376
+ DEFINE_EVENT_CODE(cXMLParser, CDATA);
2377
+ DEFINE_EVENT_CODE(cXMLParser, PI);
2378
+ DEFINE_EVENT_CODE(cXMLParser, DEFAULT);
2379
+ DEFINE_EVENT_CODE(cXMLParser, UNPARSED_ENTITY_DECL);
2380
+ DEFINE_EVENT_CODE(cXMLParser, NOTATION_DECL);
2381
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_ENTITY_REF);
2383
+ DEFINE_EVENT_CODE(cXMLParser, COMMENT);
2384
+ DEFINE_EVENT_CODE(cXMLParser, START_CDATA);
2385
+ DEFINE_EVENT_CODE(cXMLParser, END_CDATA);
2386
+ DEFINE_EVENT_CODE(cXMLParser, START_NAMESPACE_DECL);
2387
+ DEFINE_EVENT_CODE(cXMLParser, END_NAMESPACE_DECL);
2389
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2390
+ DEFINE_EVENT_CODE(cXMLParser, SKIPPED_ENTITY);
2393
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_NEVER",
2394
+ XML_PARAM_ENTITY_PARSING_NEVER);
2395
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_UNLESS_STANDALONE",
2396
+ XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2397
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_ALWAYS",
2398
+ XML_PARAM_ENTITY_PARSING_ALWAYS);
2400
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2401
+ DEFINE_EVENT_CODE(cXMLParser, START_DOCTYPE_DECL);
2402
+ DEFINE_EVENT_CODE(cXMLParser, END_DOCTYPE_DECL);
2404
+#ifdef HAVE_EXPAT_H
2405
+ DEFINE_EVENT_CODE(cXMLParser, ELEMENT_DECL);
2406
+ DEFINE_EVENT_CODE(cXMLParser, ATTLIST_DECL);
2407
+ DEFINE_EVENT_CODE(cXMLParser, XML_DECL);
2408
+ DEFINE_EVENT_CODE(cXMLParser, ENTITY_DECL);
2411
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_PARSED_ENTITY_DECL);
2412
+ DEFINE_EVENT_CODE(cXMLParser, INTERNAL_PARSED_ENTITY_DECL);
2415
+ DEFINE_EVENT_CODE(cXMLParser, UNKNOWN_ENCODING);
2418
+ id_map = rb_intern("_map");
2419
+ id_startElementHandler = rb_intern("startElement");
2420
+ id_endElementHandler = rb_intern("endElement");
2421
+ id_characterDataHandler = rb_intern("character");
2422
+ id_processingInstructionHandler = rb_intern("processingInstruction");
2423
+ id_defaultHandler = rb_intern("default");
2424
+ id_unparsedEntityDeclHandler = rb_intern("unparsedEntityDecl");
2425
+ id_notationDeclHandler = rb_intern("notationDecl");
2426
+ id_externalEntityRefHandler = rb_intern("externalEntityRef");
2428
+ id_defaultExpandHandler = rb_intern("defaultExpand");
2429
+ id_commentHandler = rb_intern("comment");
2430
+ id_startCdataSectionHandler = rb_intern("startCdata");
2431
+ id_endCdataSectionHandler = rb_intern("endCdata");
2432
+ id_startNamespaceDeclHandler = rb_intern("startNamespaceDecl");
2433
+ id_endNamespaceDeclHandler = rb_intern("endNamespaceDecl");
2434
+ id_notStandaloneHandler = rb_intern("notStandalone");
2436
+#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2437
+ id_startDoctypeDeclHandler = rb_intern("startDoctypeDecl");
2438
+ id_endDoctypeDeclHandler = rb_intern("endDoctypeDecl");
2440
+ id_unknownEncoding = rb_intern("unknownEncoding");
2441
+ id_convert = rb_intern("convert");
2442
+#ifdef HAVE_EXPAT_H
2443
+ id_elementDeclHandler = rb_intern("elementDecl");
2444
+ id_attlistDeclHandler = rb_intern("attlistDecl");
2445
+ id_xmlDeclHandler = rb_intern("xmlDecl");
2446
+ id_entityDeclHandler = rb_intern("entityDecl");
2449
+ id_externalParsedEntityDeclHandler = rb_intern("externalParsedEntityDecl");
2450
+ id_internalParsedEntityDeclHandler = rb_intern("internalParsedEntityDecl");
2452
+#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2453
+ id_skippedEntityHandler = rb_intern("skippedEntity");