2
#include "structmember.h"
3
#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
6
#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7
typedef int Py_ssize_t;
8
#define PY_SSIZE_T_MAX INT_MAX
9
#define PY_SSIZE_T_MIN INT_MIN
10
#define PyInt_FromSsize_t PyInt_FromLong
11
#define PyInt_AsSsize_t PyInt_AsLong
14
#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
18
#define UNUSED __attribute__((__unused__))
23
#define DEFAULT_ENCODING "utf-8"
25
#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26
#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27
#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28
#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
30
static PyTypeObject PyScannerType;
31
static PyTypeObject PyEncoderType;
33
typedef struct _PyScannerObject {
37
PyObject *object_hook;
38
PyObject *parse_float;
40
PyObject *parse_constant;
43
static PyMemberDef scanner_members[] = {
44
{"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
45
{"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
46
{"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
47
{"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
48
{"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
49
{"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
53
typedef struct _PyEncoderObject {
59
PyObject *key_separator;
60
PyObject *item_separator;
67
static PyMemberDef encoder_members[] = {
68
{"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
69
{"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
70
{"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
71
{"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
72
{"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
73
{"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
74
{"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
75
{"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
80
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
82
ascii_escape_unicode(PyObject *pystr);
84
ascii_escape_str(PyObject *pystr);
86
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
87
void init_speedups(void);
89
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
91
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
93
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
95
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
97
scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
99
scanner_dealloc(PyObject *self);
101
scanner_clear(PyObject *self);
103
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
105
encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
107
encoder_dealloc(PyObject *self);
109
encoder_clear(PyObject *self);
111
encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
113
encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
115
encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
117
_encoded_const(PyObject *const);
119
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
121
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
123
_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
125
_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
127
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
129
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
130
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
132
#define MIN_EXPANSION 6
133
#ifdef Py_UNICODE_WIDE
134
#define MAX_EXPANSION (2 * MIN_EXPANSION)
136
#define MAX_EXPANSION MIN_EXPANSION
140
_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
142
/* PyObject to Py_ssize_t converter */
143
*size_ptr = PyInt_AsSsize_t(o);
144
if (*size_ptr == -1 && PyErr_Occurred());
150
_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
152
/* Py_ssize_t to PyObject converter */
153
return PyInt_FromSsize_t(*size_ptr);
157
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
159
/* Escape unicode code point c to ASCII escape sequences
160
in char *output. output must have at least 12 bytes unused to
161
accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
162
output[chars++] = '\\';
164
case '\\': output[chars++] = (char)c; break;
165
case '"': output[chars++] = (char)c; break;
166
case '\b': output[chars++] = 'b'; break;
167
case '\f': output[chars++] = 'f'; break;
168
case '\n': output[chars++] = 'n'; break;
169
case '\r': output[chars++] = 'r'; break;
170
case '\t': output[chars++] = 't'; break;
172
#ifdef Py_UNICODE_WIDE
174
/* UTF-16 surrogate pair */
175
Py_UNICODE v = c - 0x10000;
176
c = 0xd800 | ((v >> 10) & 0x3ff);
177
output[chars++] = 'u';
178
output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
179
output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
180
output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
181
output[chars++] = "0123456789abcdef"[(c ) & 0xf];
182
c = 0xdc00 | (v & 0x3ff);
183
output[chars++] = '\\';
186
output[chars++] = 'u';
187
output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
188
output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
189
output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
190
output[chars++] = "0123456789abcdef"[(c ) & 0xf];
196
ascii_escape_unicode(PyObject *pystr)
198
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
200
Py_ssize_t input_chars;
201
Py_ssize_t output_size;
202
Py_ssize_t max_output_size;
206
Py_UNICODE *input_unicode;
208
input_chars = PyUnicode_GET_SIZE(pystr);
209
input_unicode = PyUnicode_AS_UNICODE(pystr);
211
/* One char input can be up to 6 chars output, estimate 4 of these */
212
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
213
max_output_size = 2 + (input_chars * MAX_EXPANSION);
214
rval = PyString_FromStringAndSize(NULL, output_size);
218
output = PyString_AS_STRING(rval);
220
output[chars++] = '"';
221
for (i = 0; i < input_chars; i++) {
222
Py_UNICODE c = input_unicode[i];
224
output[chars++] = (char)c;
227
chars = ascii_escape_char(c, output, chars);
229
if (output_size - chars < (1 + MAX_EXPANSION)) {
230
/* There's more than four, so let's resize by a lot */
231
Py_ssize_t new_output_size = output_size * 2;
232
/* This is an upper bound */
233
if (new_output_size > max_output_size) {
234
new_output_size = max_output_size;
236
/* Make sure that the output size changed before resizing */
237
if (new_output_size != output_size) {
238
output_size = new_output_size;
239
if (_PyString_Resize(&rval, output_size) == -1) {
242
output = PyString_AS_STRING(rval);
246
output[chars++] = '"';
247
if (_PyString_Resize(&rval, chars) == -1) {
254
ascii_escape_str(PyObject *pystr)
256
/* Take a PyString pystr and return a new ASCII-only escaped PyString */
258
Py_ssize_t input_chars;
259
Py_ssize_t output_size;
265
input_chars = PyString_GET_SIZE(pystr);
266
input_str = PyString_AS_STRING(pystr);
268
/* Fast path for a string that's already ASCII */
269
for (i = 0; i < input_chars; i++) {
270
Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
272
/* If we have to escape something, scan the string for unicode */
274
for (j = i; j < input_chars; j++) {
275
c = (Py_UNICODE)(unsigned char)input_str[j];
277
/* We hit a non-ASCII character, bail to unicode mode */
279
uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
283
rval = ascii_escape_unicode(uni);
292
if (i == input_chars) {
293
/* Input is already ASCII */
294
output_size = 2 + input_chars;
297
/* One char input can be up to 6 chars output, estimate 4 of these */
298
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
300
rval = PyString_FromStringAndSize(NULL, output_size);
304
output = PyString_AS_STRING(rval);
307
/* We know that everything up to i is ASCII already */
309
memcpy(&output[1], input_str, i);
311
for (; i < input_chars; i++) {
312
Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
314
output[chars++] = (char)c;
317
chars = ascii_escape_char(c, output, chars);
319
/* An ASCII char can't possibly expand to a surrogate! */
320
if (output_size - chars < (1 + MIN_EXPANSION)) {
321
/* There's more than four, so let's resize by a lot */
323
if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
324
output_size = 2 + (input_chars * MIN_EXPANSION);
326
if (_PyString_Resize(&rval, output_size) == -1) {
329
output = PyString_AS_STRING(rval);
332
output[chars++] = '"';
333
if (_PyString_Resize(&rval, chars) == -1) {
340
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
342
/* Use the Python function simplejson.decoder.errmsg to raise a nice
343
looking ValueError exception */
344
static PyObject *errmsg_fn = NULL;
346
if (errmsg_fn == NULL) {
347
PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
350
errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
352
if (errmsg_fn == NULL)
355
pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
357
PyErr_SetObject(PyExc_ValueError, pymsg);
363
join_list_unicode(PyObject *lst)
365
/* return u''.join(lst) */
366
static PyObject *joinfn = NULL;
367
if (joinfn == NULL) {
368
PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
372
joinfn = PyObject_GetAttrString(ustr, "join");
377
return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
381
join_list_string(PyObject *lst)
383
/* return ''.join(lst) */
384
static PyObject *joinfn = NULL;
385
if (joinfn == NULL) {
386
PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
390
joinfn = PyObject_GetAttrString(ustr, "join");
395
return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
399
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
400
/* return (rval, idx) tuple, stealing reference to rval */
404
steal a reference to rval, returns (rval, idx)
409
pyidx = PyInt_FromSsize_t(idx);
414
tpl = PyTuple_New(2);
420
PyTuple_SET_ITEM(tpl, 0, rval);
421
PyTuple_SET_ITEM(tpl, 1, pyidx);
426
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
428
/* Read the JSON string from PyString pystr.
429
end is the index of the first character after the quote.
430
encoding is the encoding of pystr (must be an ASCII superset)
431
if strict is zero then literal control characters are allowed
432
*next_end_ptr is a return-by-reference index of the character
435
Return value is a new PyString (if ASCII-only) or PyUnicode
438
Py_ssize_t len = PyString_GET_SIZE(pystr);
439
Py_ssize_t begin = end - 1;
440
Py_ssize_t next = begin;
442
char *buf = PyString_AS_STRING(pystr);
443
PyObject *chunks = PyList_New(0);
444
if (chunks == NULL) {
447
if (end < 0 || len <= end) {
448
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
452
/* Find the end of the string or the next escape */
454
PyObject *chunk = NULL;
455
for (next = end; next < len; next++) {
456
c = (unsigned char)buf[next];
457
if (c == '"' || c == '\\') {
460
else if (strict && c <= 0x1f) {
461
raise_errmsg("Invalid control character at", pystr, next);
468
if (!(c == '"' || c == '\\')) {
469
raise_errmsg("Unterminated string starting at", pystr, begin);
472
/* Pick up this chunk if it's not zero length */
474
PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
475
if (strchunk == NULL) {
479
chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
488
if (PyList_Append(chunks, chunk)) {
500
raise_errmsg("Unterminated string starting at", pystr, begin);
505
/* Non-unicode backslash escapes */
511
case 'b': c = '\b'; break;
512
case 'f': c = '\f'; break;
513
case 'n': c = '\n'; break;
514
case 'r': c = '\r'; break;
515
case 't': c = '\t'; break;
519
raise_errmsg("Invalid \\escape", pystr, end - 2);
528
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
531
/* Decode 4 hex digits */
532
for (; next < end; next++) {
533
Py_UNICODE digit = buf[next];
536
case '0': case '1': case '2': case '3': case '4':
537
case '5': case '6': case '7': case '8': case '9':
538
c |= (digit - '0'); break;
539
case 'a': case 'b': case 'c': case 'd': case 'e':
541
c |= (digit - 'a' + 10); break;
542
case 'A': case 'B': case 'C': case 'D': case 'E':
544
c |= (digit - 'A' + 10); break;
546
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
550
#ifdef Py_UNICODE_WIDE
552
if ((c & 0xfc00) == 0xd800) {
554
if (end + 6 >= len) {
555
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
558
if (buf[next++] != '\\' || buf[next++] != 'u') {
559
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
563
/* Decode 4 hex digits */
564
for (; next < end; next++) {
566
Py_UNICODE digit = buf[next];
568
case '0': case '1': case '2': case '3': case '4':
569
case '5': case '6': case '7': case '8': case '9':
570
c2 |= (digit - '0'); break;
571
case 'a': case 'b': case 'c': case 'd': case 'e':
573
c2 |= (digit - 'a' + 10); break;
574
case 'A': case 'B': case 'C': case 'D': case 'E':
576
c2 |= (digit - 'A' + 10); break;
578
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
582
if ((c2 & 0xfc00) != 0xdc00) {
583
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
586
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
588
else if ((c & 0xfc00) == 0xdc00) {
589
raise_errmsg("Unpaired low surrogate", pystr, end - 5);
598
chunk = PyUnicode_FromUnicode(&c, 1);
604
char c_char = Py_CHARMASK(c);
605
chunk = PyString_FromStringAndSize(&c_char, 1);
610
if (PyList_Append(chunks, chunk)) {
617
rval = join_list_string(chunks);
632
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
634
/* Read the JSON string from PyUnicode pystr.
635
end is the index of the first character after the quote.
636
if strict is zero then literal control characters are allowed
637
*next_end_ptr is a return-by-reference index of the character
640
Return value is a new PyUnicode
643
Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
644
Py_ssize_t begin = end - 1;
645
Py_ssize_t next = begin;
646
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
647
PyObject *chunks = PyList_New(0);
648
if (chunks == NULL) {
651
if (end < 0 || len <= end) {
652
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
656
/* Find the end of the string or the next escape */
658
PyObject *chunk = NULL;
659
for (next = end; next < len; next++) {
661
if (c == '"' || c == '\\') {
664
else if (strict && c <= 0x1f) {
665
raise_errmsg("Invalid control character at", pystr, next);
669
if (!(c == '"' || c == '\\')) {
670
raise_errmsg("Unterminated string starting at", pystr, begin);
673
/* Pick up this chunk if it's not zero length */
675
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
679
if (PyList_Append(chunks, chunk)) {
691
raise_errmsg("Unterminated string starting at", pystr, begin);
696
/* Non-unicode backslash escapes */
702
case 'b': c = '\b'; break;
703
case 'f': c = '\f'; break;
704
case 'n': c = '\n'; break;
705
case 'r': c = '\r'; break;
706
case 't': c = '\t'; break;
710
raise_errmsg("Invalid \\escape", pystr, end - 2);
719
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
722
/* Decode 4 hex digits */
723
for (; next < end; next++) {
724
Py_UNICODE digit = buf[next];
727
case '0': case '1': case '2': case '3': case '4':
728
case '5': case '6': case '7': case '8': case '9':
729
c |= (digit - '0'); break;
730
case 'a': case 'b': case 'c': case 'd': case 'e':
732
c |= (digit - 'a' + 10); break;
733
case 'A': case 'B': case 'C': case 'D': case 'E':
735
c |= (digit - 'A' + 10); break;
737
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
741
#ifdef Py_UNICODE_WIDE
743
if ((c & 0xfc00) == 0xd800) {
745
if (end + 6 >= len) {
746
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
749
if (buf[next++] != '\\' || buf[next++] != 'u') {
750
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
754
/* Decode 4 hex digits */
755
for (; next < end; next++) {
757
Py_UNICODE digit = buf[next];
759
case '0': case '1': case '2': case '3': case '4':
760
case '5': case '6': case '7': case '8': case '9':
761
c2 |= (digit - '0'); break;
762
case 'a': case 'b': case 'c': case 'd': case 'e':
764
c2 |= (digit - 'a' + 10); break;
765
case 'A': case 'B': case 'C': case 'D': case 'E':
767
c2 |= (digit - 'A' + 10); break;
769
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
773
if ((c2 & 0xfc00) != 0xdc00) {
774
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
777
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
779
else if ((c & 0xfc00) == 0xdc00) {
780
raise_errmsg("Unpaired low surrogate", pystr, end - 5);
785
chunk = PyUnicode_FromUnicode(&c, 1);
789
if (PyList_Append(chunks, chunk)) {
796
rval = join_list_unicode(chunks);
809
PyDoc_STRVAR(pydoc_scanstring,
810
"scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
812
"Scan the string s for a JSON string. End is the index of the\n"
813
"character in s after the quote that started the JSON string.\n"
814
"Unescapes all valid JSON string escape sequences and raises ValueError\n"
815
"on attempt to decode an invalid string. If strict is False then literal\n"
816
"control characters are allowed in the string.\n"
818
"Returns a tuple of the decoded string and the index of the character in s\n"
819
"after the end quote."
823
py_scanstring(PyObject* self UNUSED, PyObject *args)
828
Py_ssize_t next_end = -1;
829
char *encoding = NULL;
831
if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
834
if (encoding == NULL) {
835
encoding = DEFAULT_ENCODING;
837
if (PyString_Check(pystr)) {
838
rval = scanstring_str(pystr, end, encoding, strict, &next_end);
840
else if (PyUnicode_Check(pystr)) {
841
rval = scanstring_unicode(pystr, end, strict, &next_end);
844
PyErr_Format(PyExc_TypeError,
845
"first argument must be a string, not %.80s",
846
Py_TYPE(pystr)->tp_name);
849
return _build_rval_index_tuple(rval, next_end);
852
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
853
"encode_basestring_ascii(basestring) -> str\n"
855
"Return an ASCII-only JSON representation of a Python string"
859
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
861
/* Return an ASCII-only JSON representation of a Python string */
863
if (PyString_Check(pystr)) {
864
return ascii_escape_str(pystr);
866
else if (PyUnicode_Check(pystr)) {
867
return ascii_escape_unicode(pystr);
870
PyErr_Format(PyExc_TypeError,
871
"first argument must be a string, not %.80s",
872
Py_TYPE(pystr)->tp_name);
878
scanner_dealloc(PyObject *self)
880
/* Deallocate scanner object */
882
Py_TYPE(self)->tp_free(self);
886
scanner_traverse(PyObject *self, visitproc visit, void *arg)
889
assert(PyScanner_Check(self));
890
s = (PyScannerObject *)self;
891
Py_VISIT(s->encoding);
893
Py_VISIT(s->object_hook);
894
Py_VISIT(s->parse_float);
895
Py_VISIT(s->parse_int);
896
Py_VISIT(s->parse_constant);
901
scanner_clear(PyObject *self)
904
assert(PyScanner_Check(self));
905
s = (PyScannerObject *)self;
906
Py_CLEAR(s->encoding);
908
Py_CLEAR(s->object_hook);
909
Py_CLEAR(s->parse_float);
910
Py_CLEAR(s->parse_int);
911
Py_CLEAR(s->parse_constant);
916
_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
917
/* Read a JSON object from PyString pystr.
918
idx is the index of the first character after the opening curly brace.
919
*next_idx_ptr is a return-by-reference index to the first character after
920
the closing curly brace.
922
Returns a new PyObject (usually a dict, but object_hook can change that)
924
char *str = PyString_AS_STRING(pystr);
925
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
926
PyObject *rval = PyDict_New();
927
PyObject *key = NULL;
928
PyObject *val = NULL;
929
char *encoding = PyString_AS_STRING(s->encoding);
930
int strict = PyObject_IsTrue(s->strict);
935
/* skip whitespace after { */
936
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
938
/* only loop if the object is non-empty */
939
if (idx <= end_idx && str[idx] != '}') {
940
while (idx <= end_idx) {
942
if (str[idx] != '"') {
943
raise_errmsg("Expecting property name", pystr, idx);
946
key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
951
/* skip whitespace between key and : delimiter, read :, skip whitespace */
952
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
953
if (idx > end_idx || str[idx] != ':') {
954
raise_errmsg("Expecting : delimiter", pystr, idx);
958
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
960
/* read any JSON data type */
961
val = scan_once_str(s, pystr, idx, &next_idx);
965
if (PyDict_SetItem(rval, key, val) == -1)
972
/* skip whitespace before } or , */
973
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
975
/* bail if the object is closed or we didn't get the , delimiter */
976
if (idx > end_idx) break;
977
if (str[idx] == '}') {
980
else if (str[idx] != ',') {
981
raise_errmsg("Expecting , delimiter", pystr, idx);
986
/* skip whitespace after , delimiter */
987
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
990
/* verify that idx < end_idx, str[idx] should be '}' */
991
if (idx > end_idx || str[idx] != '}') {
992
raise_errmsg("Expecting object", pystr, end_idx);
995
/* if object_hook is not None: rval = object_hook(rval) */
996
if (s->object_hook != Py_None) {
997
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1004
*next_idx_ptr = idx + 1;
1014
_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1015
/* Read a JSON object from PyUnicode pystr.
1016
idx is the index of the first character after the opening curly brace.
1017
*next_idx_ptr is a return-by-reference index to the first character after
1018
the closing curly brace.
1020
Returns a new PyObject (usually a dict, but object_hook can change that)
1022
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1023
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1024
PyObject *val = NULL;
1025
PyObject *rval = PyDict_New();
1026
PyObject *key = NULL;
1027
int strict = PyObject_IsTrue(s->strict);
1028
Py_ssize_t next_idx;
1032
/* skip whitespace after { */
1033
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1035
/* only loop if the object is non-empty */
1036
if (idx <= end_idx && str[idx] != '}') {
1037
while (idx <= end_idx) {
1039
if (str[idx] != '"') {
1040
raise_errmsg("Expecting property name", pystr, idx);
1043
key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1048
/* skip whitespace between key and : delimiter, read :, skip whitespace */
1049
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1050
if (idx > end_idx || str[idx] != ':') {
1051
raise_errmsg("Expecting : delimiter", pystr, idx);
1055
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1057
/* read any JSON term */
1058
val = scan_once_unicode(s, pystr, idx, &next_idx);
1062
if (PyDict_SetItem(rval, key, val) == -1)
1069
/* skip whitespace before } or , */
1070
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1072
/* bail if the object is closed or we didn't get the , delimiter */
1073
if (idx > end_idx) break;
1074
if (str[idx] == '}') {
1077
else if (str[idx] != ',') {
1078
raise_errmsg("Expecting , delimiter", pystr, idx);
1083
/* skip whitespace after , delimiter */
1084
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1088
/* verify that idx < end_idx, str[idx] should be '}' */
1089
if (idx > end_idx || str[idx] != '}') {
1090
raise_errmsg("Expecting object", pystr, end_idx);
1094
/* if object_hook is not None: rval = object_hook(rval) */
1095
if (s->object_hook != Py_None) {
1096
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1103
*next_idx_ptr = idx + 1;
1113
_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1114
/* Read a JSON array from PyString pystr.
1115
idx is the index of the first character after the opening brace.
1116
*next_idx_ptr is a return-by-reference index to the first character after
1119
Returns a new PyList
1121
char *str = PyString_AS_STRING(pystr);
1122
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1123
PyObject *val = NULL;
1124
PyObject *rval = PyList_New(0);
1125
Py_ssize_t next_idx;
1129
/* skip whitespace after [ */
1130
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1132
/* only loop if the array is non-empty */
1133
if (idx <= end_idx && str[idx] != ']') {
1134
while (idx <= end_idx) {
1136
/* read any JSON term and de-tuplefy the (rval, idx) */
1137
val = scan_once_str(s, pystr, idx, &next_idx);
1141
if (PyList_Append(rval, val) == -1)
1147
/* skip whitespace between term and , */
1148
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1150
/* bail if the array is closed or we didn't get the , delimiter */
1151
if (idx > end_idx) break;
1152
if (str[idx] == ']') {
1155
else if (str[idx] != ',') {
1156
raise_errmsg("Expecting , delimiter", pystr, idx);
1161
/* skip whitespace after , */
1162
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1166
/* verify that idx < end_idx, str[idx] should be ']' */
1167
if (idx > end_idx || str[idx] != ']') {
1168
raise_errmsg("Expecting object", pystr, end_idx);
1171
*next_idx_ptr = idx + 1;
1180
_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1181
/* Read a JSON array from PyString pystr.
1182
idx is the index of the first character after the opening brace.
1183
*next_idx_ptr is a return-by-reference index to the first character after
1186
Returns a new PyList
1188
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1189
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1190
PyObject *val = NULL;
1191
PyObject *rval = PyList_New(0);
1192
Py_ssize_t next_idx;
1196
/* skip whitespace after [ */
1197
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1199
/* only loop if the array is non-empty */
1200
if (idx <= end_idx && str[idx] != ']') {
1201
while (idx <= end_idx) {
1203
/* read any JSON term */
1204
val = scan_once_unicode(s, pystr, idx, &next_idx);
1208
if (PyList_Append(rval, val) == -1)
1214
/* skip whitespace between term and , */
1215
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1217
/* bail if the array is closed or we didn't get the , delimiter */
1218
if (idx > end_idx) break;
1219
if (str[idx] == ']') {
1222
else if (str[idx] != ',') {
1223
raise_errmsg("Expecting , delimiter", pystr, idx);
1228
/* skip whitespace after , */
1229
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1233
/* verify that idx < end_idx, str[idx] should be ']' */
1234
if (idx > end_idx || str[idx] != ']') {
1235
raise_errmsg("Expecting object", pystr, end_idx);
1238
*next_idx_ptr = idx + 1;
1247
_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1248
/* Read a JSON constant from PyString pystr.
1249
constant is the constant string that was found
1250
("NaN", "Infinity", "-Infinity").
1251
idx is the index of the first character of the constant
1252
*next_idx_ptr is a return-by-reference index to the first character after
1255
Returns the result of parse_constant
1259
/* constant is "NaN", "Infinity", or "-Infinity" */
1260
cstr = PyString_InternFromString(constant);
1264
/* rval = parse_constant(constant) */
1265
rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1266
idx += PyString_GET_SIZE(cstr);
1268
*next_idx_ptr = idx;
1273
_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1274
/* Read a JSON number from PyString pystr.
1275
idx is the index of the first character of the number
1276
*next_idx_ptr is a return-by-reference index to the first character after
1279
Returns a new PyObject representation of that number:
1280
PyInt, PyLong, or PyFloat.
1281
May return other types if parse_int or parse_float are set
1283
char *str = PyString_AS_STRING(pystr);
1284
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1285
Py_ssize_t idx = start;
1290
/* read a sign if it's there, make sure it's not the end of the string */
1291
if (str[idx] == '-') {
1293
if (idx > end_idx) {
1294
PyErr_SetNone(PyExc_StopIteration);
1299
/* read as many integer digits as we find as long as it doesn't start with 0 */
1300
if (str[idx] >= '1' && str[idx] <= '9') {
1302
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1304
/* if it starts with 0 we only expect one integer digit */
1305
else if (str[idx] == '0') {
1308
/* no integer digits, error */
1310
PyErr_SetNone(PyExc_StopIteration);
1314
/* if the next char is '.' followed by a digit then read all float digits */
1315
if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1318
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1321
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1322
if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1324
/* save the index of the 'e' or 'E' just in case we need to backtrack */
1325
Py_ssize_t e_start = idx;
1328
/* read an exponent sign if present */
1329
if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1331
/* read all digits */
1332
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1334
/* if we got a digit, then parse as float. if not, backtrack */
1335
if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1343
/* copy the section we determined to be a number */
1344
numstr = PyString_FromStringAndSize(&str[start], idx - start);
1348
/* parse as a float using a fast path if available, otherwise call user defined method */
1349
if (s->parse_float != (PyObject *)&PyFloat_Type) {
1350
rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1353
rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr)));
1357
/* parse as an int using a fast path if available, otherwise call user defined method */
1358
if (s->parse_int != (PyObject *)&PyInt_Type) {
1359
rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1362
rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1366
*next_idx_ptr = idx;
1371
_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1372
/* Read a JSON number from PyUnicode pystr.
1373
idx is the index of the first character of the number
1374
*next_idx_ptr is a return-by-reference index to the first character after
1377
Returns a new PyObject representation of that number:
1378
PyInt, PyLong, or PyFloat.
1379
May return other types if parse_int or parse_float are set
1381
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1382
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1383
Py_ssize_t idx = start;
1388
/* read a sign if it's there, make sure it's not the end of the string */
1389
if (str[idx] == '-') {
1391
if (idx > end_idx) {
1392
PyErr_SetNone(PyExc_StopIteration);
1397
/* read as many integer digits as we find as long as it doesn't start with 0 */
1398
if (str[idx] >= '1' && str[idx] <= '9') {
1400
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1402
/* if it starts with 0 we only expect one integer digit */
1403
else if (str[idx] == '0') {
1406
/* no integer digits, error */
1408
PyErr_SetNone(PyExc_StopIteration);
1412
/* if the next char is '.' followed by a digit then read all float digits */
1413
if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1416
while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1419
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1420
if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1421
Py_ssize_t e_start = idx;
1424
/* read an exponent sign if present */
1425
if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1427
/* read all digits */
1428
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1430
/* if we got a digit, then parse as float. if not, backtrack */
1431
if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1439
/* copy the section we determined to be a number */
1440
numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1444
/* parse as a float using a fast path if available, otherwise call user defined method */
1445
if (s->parse_float != (PyObject *)&PyFloat_Type) {
1446
rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1449
rval = PyFloat_FromString(numstr, NULL);
1453
/* no fast path for unicode -> int, just call */
1454
rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1457
*next_idx_ptr = idx;
1462
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1464
/* Read one JSON term (of any kind) from PyString pystr.
1465
idx is the index of the first character of the term
1466
*next_idx_ptr is a return-by-reference index to the first character after
1469
Returns a new PyObject representation of the term.
1471
char *str = PyString_AS_STRING(pystr);
1472
Py_ssize_t length = PyString_GET_SIZE(pystr);
1473
if (idx >= length) {
1474
PyErr_SetNone(PyExc_StopIteration);
1480
return scanstring_str(pystr, idx + 1,
1481
PyString_AS_STRING(s->encoding),
1482
PyObject_IsTrue(s->strict),
1486
return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1489
return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1492
if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1494
*next_idx_ptr = idx + 4;
1500
if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1502
*next_idx_ptr = idx + 4;
1508
if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1509
Py_INCREF(Py_False);
1510
*next_idx_ptr = idx + 5;
1516
if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1517
return _parse_constant(s, "NaN", idx, next_idx_ptr);
1522
if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1523
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1528
if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1529
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1533
/* Didn't find a string, object, array, or named constant. Look for a number. */
1534
return _match_number_str(s, pystr, idx, next_idx_ptr);
1538
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1540
/* Read one JSON term (of any kind) from PyUnicode pystr.
1541
idx is the index of the first character of the term
1542
*next_idx_ptr is a return-by-reference index to the first character after
1545
Returns a new PyObject representation of the term.
1547
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1548
Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1549
if (idx >= length) {
1550
PyErr_SetNone(PyExc_StopIteration);
1556
return scanstring_unicode(pystr, idx + 1,
1557
PyObject_IsTrue(s->strict),
1561
return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1564
return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1567
if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1569
*next_idx_ptr = idx + 4;
1575
if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1577
*next_idx_ptr = idx + 4;
1583
if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1584
Py_INCREF(Py_False);
1585
*next_idx_ptr = idx + 5;
1591
if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1592
return _parse_constant(s, "NaN", idx, next_idx_ptr);
1597
if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1598
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1603
if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1604
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1608
/* Didn't find a string, object, array, or named constant. Look for a number. */
1609
return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1613
scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1615
/* Python callable interface to scan_once_{str,unicode} */
1619
Py_ssize_t next_idx = -1;
1620
static char *kwlist[] = {"string", "idx", NULL};
1622
assert(PyScanner_Check(self));
1623
s = (PyScannerObject *)self;
1624
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1627
if (PyString_Check(pystr)) {
1628
rval = scan_once_str(s, pystr, idx, &next_idx);
1630
else if (PyUnicode_Check(pystr)) {
1631
rval = scan_once_unicode(s, pystr, idx, &next_idx);
1634
PyErr_Format(PyExc_TypeError,
1635
"first argument must be a string, not %.80s",
1636
Py_TYPE(pystr)->tp_name);
1639
return _build_rval_index_tuple(rval, next_idx);
1643
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1646
s = (PyScannerObject *)type->tp_alloc(type, 0);
1650
s->object_hook = NULL;
1651
s->parse_float = NULL;
1652
s->parse_int = NULL;
1653
s->parse_constant = NULL;
1655
return (PyObject *)s;
1659
scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1661
/* Initialize Scanner object */
1663
static char *kwlist[] = {"context", NULL};
1666
assert(PyScanner_Check(self));
1667
s = (PyScannerObject *)self;
1669
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1672
/* PyString_AS_STRING is used on encoding */
1673
s->encoding = PyObject_GetAttrString(ctx, "encoding");
1674
if (s->encoding == Py_None) {
1676
s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1678
else if (PyUnicode_Check(s->encoding)) {
1679
PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1680
Py_DECREF(s->encoding);
1683
if (s->encoding == NULL || !PyString_Check(s->encoding))
1686
/* All of these will fail "gracefully" so we don't need to verify them */
1687
s->strict = PyObject_GetAttrString(ctx, "strict");
1688
if (s->strict == NULL)
1690
s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1691
if (s->object_hook == NULL)
1693
s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1694
if (s->parse_float == NULL)
1696
s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1697
if (s->parse_int == NULL)
1699
s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1700
if (s->parse_constant == NULL)
1706
Py_CLEAR(s->encoding);
1707
Py_CLEAR(s->strict);
1708
Py_CLEAR(s->object_hook);
1709
Py_CLEAR(s->parse_float);
1710
Py_CLEAR(s->parse_int);
1711
Py_CLEAR(s->parse_constant);
1715
PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1718
PyTypeObject PyScannerType = {
1719
PyObject_HEAD_INIT(NULL)
1720
0, /* tp_internal */
1721
"simplejson._speedups.Scanner", /* tp_name */
1722
sizeof(PyScannerObject), /* tp_basicsize */
1723
0, /* tp_itemsize */
1724
scanner_dealloc, /* tp_dealloc */
1730
0, /* tp_as_number */
1731
0, /* tp_as_sequence */
1732
0, /* tp_as_mapping */
1734
scanner_call, /* tp_call */
1736
0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1737
0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1738
0, /* tp_as_buffer */
1739
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1740
scanner_doc, /* tp_doc */
1741
scanner_traverse, /* tp_traverse */
1742
scanner_clear, /* tp_clear */
1743
0, /* tp_richcompare */
1744
0, /* tp_weaklistoffset */
1746
0, /* tp_iternext */
1748
scanner_members, /* tp_members */
1752
0, /* tp_descr_get */
1753
0, /* tp_descr_set */
1754
0, /* tp_dictoffset */
1755
scanner_init, /* tp_init */
1756
0,/* PyType_GenericAlloc, */ /* tp_alloc */
1757
scanner_new, /* tp_new */
1758
0,/* PyObject_GC_Del, */ /* tp_free */
1762
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1765
s = (PyEncoderObject *)type->tp_alloc(type, 0);
1768
s->defaultfn = NULL;
1771
s->key_separator = NULL;
1772
s->item_separator = NULL;
1773
s->sort_keys = NULL;
1776
return (PyObject *)s;
1780
encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1782
/* initialize Encoder object */
1783
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1786
PyObject *allow_nan;
1788
assert(PyEncoder_Check(self));
1789
s = (PyEncoderObject *)self;
1791
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1792
&s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))
1795
Py_INCREF(s->markers);
1796
Py_INCREF(s->defaultfn);
1797
Py_INCREF(s->encoder);
1798
Py_INCREF(s->indent);
1799
Py_INCREF(s->key_separator);
1800
Py_INCREF(s->item_separator);
1801
Py_INCREF(s->sort_keys);
1802
Py_INCREF(s->skipkeys);
1803
s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1804
s->allow_nan = PyObject_IsTrue(allow_nan);
1809
encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1811
/* Python callable interface to encode_listencode_obj */
1812
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1815
Py_ssize_t indent_level;
1817
assert(PyEncoder_Check(self));
1818
s = (PyEncoderObject *)self;
1819
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1820
&obj, _convertPyInt_AsSsize_t, &indent_level))
1822
rval = PyList_New(0);
1825
if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1833
_encoded_const(PyObject *obj)
1835
/* Return the JSON string representation of None, True, False */
1836
if (obj == Py_None) {
1837
static PyObject *s_null = NULL;
1838
if (s_null == NULL) {
1839
s_null = PyString_InternFromString("null");
1844
else if (obj == Py_True) {
1845
static PyObject *s_true = NULL;
1846
if (s_true == NULL) {
1847
s_true = PyString_InternFromString("true");
1852
else if (obj == Py_False) {
1853
static PyObject *s_false = NULL;
1854
if (s_false == NULL) {
1855
s_false = PyString_InternFromString("false");
1861
PyErr_SetString(PyExc_ValueError, "not a const");
1867
encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1869
/* Return the JSON representation of a PyFloat */
1870
double i = PyFloat_AS_DOUBLE(obj);
1871
if (!Py_IS_FINITE(i)) {
1872
if (!s->allow_nan) {
1873
PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1877
return PyString_FromString("Infinity");
1880
return PyString_FromString("-Infinity");
1883
return PyString_FromString("NaN");
1886
/* Use a better float format here? */
1887
return PyObject_Repr(obj);
1891
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1893
/* Return the JSON representation of a string */
1895
return py_encode_basestring_ascii(NULL, obj);
1897
return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1901
_steal_list_append(PyObject *lst, PyObject *stolen)
1903
/* Append stolen and then decrement its reference count */
1904
int rval = PyList_Append(lst, stolen);
1910
encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1912
/* Encode Python object obj to a JSON term, rval is a PyList */
1916
if (obj == Py_None || obj == Py_True || obj == Py_False) {
1917
PyObject *cstr = _encoded_const(obj);
1920
return _steal_list_append(rval, cstr);
1922
else if (PyString_Check(obj) || PyUnicode_Check(obj))
1924
PyObject *encoded = encoder_encode_string(s, obj);
1925
if (encoded == NULL)
1927
return _steal_list_append(rval, encoded);
1929
else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1930
PyObject *encoded = PyObject_Str(obj);
1931
if (encoded == NULL)
1933
return _steal_list_append(rval, encoded);
1935
else if (PyFloat_Check(obj)) {
1936
PyObject *encoded = encoder_encode_float(s, obj);
1937
if (encoded == NULL)
1939
return _steal_list_append(rval, encoded);
1941
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1942
return encoder_listencode_list(s, rval, obj, indent_level);
1944
else if (PyDict_Check(obj)) {
1945
return encoder_listencode_dict(s, rval, obj, indent_level);
1948
PyObject *ident = NULL;
1949
if (s->markers != Py_None) {
1951
ident = PyLong_FromVoidPtr(obj);
1954
has_key = PyDict_Contains(s->markers, ident);
1957
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1961
if (PyDict_SetItem(s->markers, ident, obj)) {
1966
newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1967
if (newobj == NULL) {
1971
rv = encoder_listencode_obj(s, rval, newobj, indent_level);
1977
if (ident != NULL) {
1978
if (PyDict_DelItem(s->markers, ident)) {
1989
encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
1991
/* Encode Python dict dct a JSON term, rval is a PyList */
1992
static PyObject *open_dict = NULL;
1993
static PyObject *close_dict = NULL;
1994
static PyObject *empty_dict = NULL;
1995
PyObject *kstr = NULL;
1996
PyObject *ident = NULL;
1997
PyObject *key, *value;
2002
if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2003
open_dict = PyString_InternFromString("{");
2004
close_dict = PyString_InternFromString("}");
2005
empty_dict = PyString_InternFromString("{}");
2006
if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2009
if (PyDict_Size(dct) == 0)
2010
return PyList_Append(rval, empty_dict);
2012
if (s->markers != Py_None) {
2014
ident = PyLong_FromVoidPtr(dct);
2017
has_key = PyDict_Contains(s->markers, ident);
2020
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2023
if (PyDict_SetItem(s->markers, ident, dct)) {
2028
if (PyList_Append(rval, open_dict))
2031
if (s->indent != Py_None) {
2032
/* TODO: DOES NOT RUN */
2035
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2036
separator = _item_separator + newline_indent
2037
buf += newline_indent
2041
/* TODO: C speedup not implemented for sort_keys */
2044
skipkeys = PyObject_IsTrue(s->skipkeys);
2046
while (PyDict_Next(dct, &pos, &key, &value)) {
2049
if (PyString_Check(key) || PyUnicode_Check(key)) {
2053
else if (PyFloat_Check(key)) {
2054
kstr = encoder_encode_float(s, key);
2058
else if (PyInt_Check(key) || PyLong_Check(key)) {
2059
kstr = PyObject_Str(key);
2063
else if (key == Py_True || key == Py_False || key == Py_None) {
2064
kstr = _encoded_const(key);
2068
else if (skipkeys) {
2072
/* TODO: include repr of key */
2073
PyErr_SetString(PyExc_ValueError, "keys must be a string");
2078
if (PyList_Append(rval, s->item_separator))
2082
encoded = encoder_encode_string(s, kstr);
2084
if (encoded == NULL)
2086
if (PyList_Append(rval, encoded)) {
2091
if (PyList_Append(rval, s->key_separator))
2093
if (encoder_listencode_obj(s, rval, value, indent_level))
2097
if (ident != NULL) {
2098
if (PyDict_DelItem(s->markers, ident))
2102
if (s->indent != Py_None) {
2103
/* TODO: DOES NOT RUN */
2106
yield '\n' + (' ' * (_indent * _current_indent_level))
2109
if (PyList_Append(rval, close_dict))
2121
encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2123
/* Encode Python list seq to a JSON term, rval is a PyList */
2124
static PyObject *open_array = NULL;
2125
static PyObject *close_array = NULL;
2126
static PyObject *empty_array = NULL;
2127
PyObject *ident = NULL;
2128
PyObject *s_fast = NULL;
2129
Py_ssize_t num_items;
2130
PyObject **seq_items;
2133
if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2134
open_array = PyString_InternFromString("[");
2135
close_array = PyString_InternFromString("]");
2136
empty_array = PyString_InternFromString("[]");
2137
if (open_array == NULL || close_array == NULL || empty_array == NULL)
2141
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2144
num_items = PySequence_Fast_GET_SIZE(s_fast);
2145
if (num_items == 0) {
2147
return PyList_Append(rval, empty_array);
2150
if (s->markers != Py_None) {
2152
ident = PyLong_FromVoidPtr(seq);
2155
has_key = PyDict_Contains(s->markers, ident);
2158
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2161
if (PyDict_SetItem(s->markers, ident, seq)) {
2166
seq_items = PySequence_Fast_ITEMS(s_fast);
2167
if (PyList_Append(rval, open_array))
2169
if (s->indent != Py_None) {
2170
/* TODO: DOES NOT RUN */
2173
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2174
separator = _item_separator + newline_indent
2175
buf += newline_indent
2178
for (i = 0; i < num_items; i++) {
2179
PyObject *obj = seq_items[i];
2181
if (PyList_Append(rval, s->item_separator))
2184
if (encoder_listencode_obj(s, rval, obj, indent_level))
2187
if (ident != NULL) {
2188
if (PyDict_DelItem(s->markers, ident))
2192
if (s->indent != Py_None) {
2193
/* TODO: DOES NOT RUN */
2196
yield '\n' + (' ' * (_indent * _current_indent_level))
2199
if (PyList_Append(rval, close_array))
2211
encoder_dealloc(PyObject *self)
2213
/* Deallocate Encoder */
2214
encoder_clear(self);
2215
Py_TYPE(self)->tp_free(self);
2219
encoder_traverse(PyObject *self, visitproc visit, void *arg)
2222
assert(PyEncoder_Check(self));
2223
s = (PyEncoderObject *)self;
2224
Py_VISIT(s->markers);
2225
Py_VISIT(s->defaultfn);
2226
Py_VISIT(s->encoder);
2227
Py_VISIT(s->indent);
2228
Py_VISIT(s->key_separator);
2229
Py_VISIT(s->item_separator);
2230
Py_VISIT(s->sort_keys);
2231
Py_VISIT(s->skipkeys);
2236
encoder_clear(PyObject *self)
2238
/* Deallocate Encoder */
2240
assert(PyEncoder_Check(self));
2241
s = (PyEncoderObject *)self;
2242
Py_CLEAR(s->markers);
2243
Py_CLEAR(s->defaultfn);
2244
Py_CLEAR(s->encoder);
2245
Py_CLEAR(s->indent);
2246
Py_CLEAR(s->key_separator);
2247
Py_CLEAR(s->item_separator);
2248
Py_CLEAR(s->sort_keys);
2249
Py_CLEAR(s->skipkeys);
2253
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2256
PyTypeObject PyEncoderType = {
2257
PyObject_HEAD_INIT(NULL)
2258
0, /* tp_internal */
2259
"simplejson._speedups.Encoder", /* tp_name */
2260
sizeof(PyEncoderObject), /* tp_basicsize */
2261
0, /* tp_itemsize */
2262
encoder_dealloc, /* tp_dealloc */
2268
0, /* tp_as_number */
2269
0, /* tp_as_sequence */
2270
0, /* tp_as_mapping */
2272
encoder_call, /* tp_call */
2274
0, /* tp_getattro */
2275
0, /* tp_setattro */
2276
0, /* tp_as_buffer */
2277
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2278
encoder_doc, /* tp_doc */
2279
encoder_traverse, /* tp_traverse */
2280
encoder_clear, /* tp_clear */
2281
0, /* tp_richcompare */
2282
0, /* tp_weaklistoffset */
2284
0, /* tp_iternext */
2286
encoder_members, /* tp_members */
2290
0, /* tp_descr_get */
2291
0, /* tp_descr_set */
2292
0, /* tp_dictoffset */
2293
encoder_init, /* tp_init */
2295
encoder_new, /* tp_new */
2299
static PyMethodDef speedups_methods[] = {
2300
{"encode_basestring_ascii",
2301
(PyCFunction)py_encode_basestring_ascii,
2303
pydoc_encode_basestring_ascii},
2305
(PyCFunction)py_scanstring,
2308
{NULL, NULL, 0, NULL}
2311
PyDoc_STRVAR(module_doc,
2312
"simplejson speedups\n");
2318
PyScannerType.tp_new = PyType_GenericNew;
2319
if (PyType_Ready(&PyScannerType) < 0)
2321
PyEncoderType.tp_new = PyType_GenericNew;
2322
if (PyType_Ready(&PyEncoderType) < 0)
2324
m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2325
Py_INCREF((PyObject*)&PyScannerType);
2326
PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2327
Py_INCREF((PyObject*)&PyEncoderType);
2328
PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);