59
59
return Data_Wrap_Struct(klass, 0, dealloc, parser);
62
static VALUE make_exception(yaml_parser_t * parser, VALUE path)
66
line = parser->context_mark.line + 1;
67
column = parser->context_mark.column + 1;
69
return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
73
INT2NUM(parser->problem_offset),
74
parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil,
75
parser->context ? rb_usascii_str_new2(parser->context) : Qnil);
78
#ifdef HAVE_RUBY_ENCODING_H
79
static VALUE transcode_string(VALUE src, int * parser_encoding)
81
int utf8 = rb_utf8_encindex();
82
int utf16le = rb_enc_find_index("UTF16_LE");
83
int utf16be = rb_enc_find_index("UTF16_BE");
84
int source_encoding = rb_enc_get_index(src);
86
if (source_encoding == utf8) {
87
*parser_encoding = YAML_UTF8_ENCODING;
91
if (source_encoding == utf16le) {
92
*parser_encoding = YAML_UTF16LE_ENCODING;
96
if (source_encoding == utf16be) {
97
*parser_encoding = YAML_UTF16BE_ENCODING;
101
src = rb_str_export_to_enc(src, rb_utf8_encoding());
104
*parser_encoding = YAML_UTF8_ENCODING;
108
static VALUE transcode_io(VALUE src, int * parser_encoding)
110
VALUE io_external_encoding;
111
int io_external_enc_index;
113
io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);
115
/* if no encoding is returned, assume ascii8bit. */
116
if (NIL_P(io_external_encoding)) {
117
io_external_enc_index = rb_ascii8bit_encindex();
119
io_external_enc_index = rb_to_encoding_index(io_external_encoding);
122
/* Treat US-ASCII as utf_8 */
123
if (io_external_enc_index == rb_usascii_encindex()) {
124
*parser_encoding = YAML_UTF8_ENCODING;
128
if (io_external_enc_index == rb_utf8_encindex()) {
129
*parser_encoding = YAML_UTF8_ENCODING;
133
if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
134
*parser_encoding = YAML_UTF16LE_ENCODING;
138
if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
139
*parser_encoding = YAML_UTF16BE_ENCODING;
143
/* Just guess on ASCII-8BIT */
144
if (io_external_enc_index == rb_ascii8bit_encindex()) {
145
*parser_encoding = YAML_ANY_ENCODING;
149
/* If the external encoding is something we don't know how to handle,
150
* fall back to YAML_ANY_ENCODING. */
151
*parser_encoding = YAML_ANY_ENCODING;
158
static VALUE protected_start_stream(VALUE pointer)
160
VALUE *args = (VALUE *)pointer;
161
return rb_funcall(args[0], id_start_stream, 1, args[1]);
164
static VALUE protected_start_document(VALUE pointer)
166
VALUE *args = (VALUE *)pointer;
167
return rb_funcall3(args[0], id_start_document, 3, args + 1);
170
static VALUE protected_end_document(VALUE pointer)
172
VALUE *args = (VALUE *)pointer;
173
return rb_funcall(args[0], id_end_document, 1, args[1]);
176
static VALUE protected_alias(VALUE pointer)
178
VALUE *args = (VALUE *)pointer;
179
return rb_funcall(args[0], id_alias, 1, args[1]);
182
static VALUE protected_scalar(VALUE pointer)
184
VALUE *args = (VALUE *)pointer;
185
return rb_funcall3(args[0], id_scalar, 6, args + 1);
188
static VALUE protected_start_sequence(VALUE pointer)
190
VALUE *args = (VALUE *)pointer;
191
return rb_funcall3(args[0], id_start_sequence, 4, args + 1);
194
static VALUE protected_end_sequence(VALUE handler)
196
return rb_funcall(handler, id_end_sequence, 0);
199
static VALUE protected_start_mapping(VALUE pointer)
201
VALUE *args = (VALUE *)pointer;
202
return rb_funcall3(args[0], id_start_mapping, 4, args + 1);
205
static VALUE protected_end_mapping(VALUE handler)
207
return rb_funcall(handler, id_end_mapping, 0);
210
static VALUE protected_empty(VALUE handler)
212
return rb_funcall(handler, id_empty, 0);
215
static VALUE protected_end_stream(VALUE handler)
217
return rb_funcall(handler, id_end_stream, 0);
64
222
* parser.parse(yaml)
69
227
* See Psych::Parser and Psych::Parser#handler
71
static VALUE parse(VALUE self, VALUE yaml)
229
static VALUE parse(int argc, VALUE *argv, VALUE self)
73
232
yaml_parser_t * parser;
74
233
yaml_event_t event;
237
int parser_encoding = YAML_ANY_ENCODING;
77
238
#ifdef HAVE_RUBY_ENCODING_H
78
239
int encoding = rb_utf8_encindex();
79
240
rb_encoding * internal_enc = rb_default_internal_encoding();
81
242
VALUE handler = rb_iv_get(self, "@handler");
244
if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
245
if(rb_respond_to(yaml, id_path))
246
path = rb_funcall(yaml, id_path, 0);
248
path = rb_str_new2("<unknown>");
83
251
Data_Get_Struct(self, yaml_parser_t, parser);
253
yaml_parser_delete(parser);
254
yaml_parser_initialize(parser);
85
256
if (OBJ_TAINTED(yaml)) tainted = 1;
87
if(rb_respond_to(yaml, id_read)) {
258
if (rb_respond_to(yaml, id_read)) {
259
#ifdef HAVE_RUBY_ENCODING_H
260
yaml = transcode_io(yaml, &parser_encoding);
261
yaml_parser_set_encoding(parser, parser_encoding);
88
263
yaml_parser_set_input(parser, io_reader, (void *)yaml);
89
264
if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
91
266
StringValue(yaml);
267
#ifdef HAVE_RUBY_ENCODING_H
268
yaml = transcode_string(yaml, &parser_encoding);
269
yaml_parser_set_encoding(parser, parser_encoding);
92
271
yaml_parser_set_input_string(
94
273
(const unsigned char *)RSTRING_PTR(yaml),
100
279
if(!yaml_parser_parse(parser, &event)) {
102
size_t line = parser->mark.line;
103
size_t column = parser->mark.column;
105
if(rb_respond_to(yaml, id_path))
106
path = rb_funcall(yaml, id_path, 0);
108
path = rb_str_new2("<unknown>");
282
exception = make_exception(parser, path);
110
283
yaml_parser_delete(parser);
111
284
yaml_parser_initialize(parser);
113
rb_raise(ePsychSyntaxError, "(%s): couldn't parse YAML at line %d column %d",
114
StringValuePtr(path),
115
(int)line, (int)column);
286
rb_exc_raise(exception);
118
289
switch(event.type) {
119
case YAML_STREAM_START_EVENT:
290
case YAML_STREAM_START_EVENT:
121
rb_funcall(handler, id_start_stream, 1,
122
INT2NUM((long)event.data.stream_start.encoding)
295
args[1] = INT2NUM((long)event.data.stream_start.encoding);
296
rb_protect(protected_start_stream, (VALUE)args, &state);
125
299
case YAML_DOCUMENT_START_EVENT:
127
302
/* Get a list of tag directives (if any) */
128
303
VALUE tag_directives = rb_ary_new();
129
304
/* Grab the document version */
161
336
rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
164
rb_funcall(handler, id_start_document, 3,
165
version, tag_directives,
166
event.data.document_start.implicit == 1 ? Qtrue : Qfalse
341
args[2] = tag_directives;
342
args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
343
rb_protect(protected_start_document, (VALUE)args, &state);
170
346
case YAML_DOCUMENT_END_EVENT:
171
rb_funcall(handler, id_end_document, 1,
172
event.data.document_end.implicit == 1 ? Qtrue : Qfalse
351
args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
352
rb_protect(protected_end_document, (VALUE)args, &state);
175
355
case YAML_ALIAS_EVENT:
177
358
VALUE alias = Qnil;
178
359
if(event.data.alias.anchor) {
179
360
alias = rb_str_new2((const char *)event.data.alias.anchor);
290
487
style = INT2NUM((long)event.data.mapping_start.style);
292
rb_funcall(handler, id_start_mapping, 4,
293
anchor, tag, implicit, style);
495
rb_protect(protected_start_mapping, (VALUE)args, &state);
296
498
case YAML_MAPPING_END_EVENT:
297
rb_funcall(handler, id_end_mapping, 0);
499
rb_protect(protected_end_mapping, handler, &state);
299
501
case YAML_NO_EVENT:
300
rb_funcall(handler, id_empty, 0);
502
rb_protect(protected_empty, handler, &state);
302
504
case YAML_STREAM_END_EVENT:
303
rb_funcall(handler, id_end_stream, 0);
505
rb_protect(protected_end_stream, handler, &state);
307
509
yaml_event_delete(&event);
510
if (state) rb_jump_tag(state);
376
556
/* UTF-16-BE Encoding with BOM */
377
557
rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
559
rb_require("psych/syntax_error");
379
560
ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
381
rb_define_method(cPsychParser, "parse", parse, 1);
562
rb_define_method(cPsychParser, "parse", parse, -1);
382
563
rb_define_method(cPsychParser, "mark", mark, 0);
383
rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1);
385
565
id_read = rb_intern("read");
386
566
id_path = rb_intern("path");