2
#include "structmember.h"
3
#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
4
#define PyOS_string_to_double json_PyOS_string_to_double
6
json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
8
json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
10
assert(endptr == NULL);
11
assert(overflow_exception == NULL);
12
PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
13
x = PyOS_ascii_atof(s);
18
#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
19
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
21
#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
22
typedef int Py_ssize_t;
23
#define PY_SSIZE_T_MAX INT_MAX
24
#define PY_SSIZE_T_MIN INT_MIN
25
#define PyInt_FromSsize_t PyInt_FromLong
26
#define PyInt_AsSsize_t PyInt_AsLong
29
#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
33
#define UNUSED __attribute__((__unused__))
38
#define DEFAULT_ENCODING "utf-8"
40
#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
41
#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
42
#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
43
#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
44
#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr))
46
static PyTypeObject PyScannerType;
47
static PyTypeObject PyEncoderType;
48
static PyTypeObject *DecimalTypePtr;
50
typedef struct _PyScannerObject {
54
PyObject *object_hook;
56
PyObject *parse_float;
58
PyObject *parse_constant;
62
static PyMemberDef scanner_members[] = {
63
{"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
64
{"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
65
{"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
66
{"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
67
{"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
68
{"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
69
{"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
73
typedef struct _PyEncoderObject {
79
PyObject *key_separator;
80
PyObject *item_separator;
89
static PyMemberDef encoder_members[] = {
90
{"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
91
{"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
92
{"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
93
{"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
94
{"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
95
{"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
96
{"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
97
{"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
98
{"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
103
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
105
ascii_escape_unicode(PyObject *pystr);
107
ascii_escape_str(PyObject *pystr);
109
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
110
void init_speedups(void);
112
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
114
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
116
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
118
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
120
scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
122
scanner_dealloc(PyObject *self);
124
scanner_clear(PyObject *self);
126
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
128
encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
130
encoder_dealloc(PyObject *self);
132
encoder_clear(PyObject *self);
134
encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
136
encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
138
encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
140
_encoded_const(PyObject *obj);
142
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
144
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
146
_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
148
_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
150
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
152
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
153
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
155
#define MIN_EXPANSION 6
156
#ifdef Py_UNICODE_WIDE
157
#define MAX_EXPANSION (2 * MIN_EXPANSION)
159
#define MAX_EXPANSION MIN_EXPANSION
163
_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
165
/* PyObject to Py_ssize_t converter */
166
*size_ptr = PyInt_AsSsize_t(o);
167
if (*size_ptr == -1 && PyErr_Occurred())
173
_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
175
/* Py_ssize_t to PyObject converter */
176
return PyInt_FromSsize_t(*size_ptr);
180
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
182
/* Escape unicode code point c to ASCII escape sequences
183
in char *output. output must have at least 12 bytes unused to
184
accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
185
output[chars++] = '\\';
187
case '\\': output[chars++] = (char)c; break;
188
case '"': output[chars++] = (char)c; break;
189
case '\b': output[chars++] = 'b'; break;
190
case '\f': output[chars++] = 'f'; break;
191
case '\n': output[chars++] = 'n'; break;
192
case '\r': output[chars++] = 'r'; break;
193
case '\t': output[chars++] = 't'; break;
195
#ifdef Py_UNICODE_WIDE
197
/* UTF-16 surrogate pair */
198
Py_UNICODE v = c - 0x10000;
199
c = 0xd800 | ((v >> 10) & 0x3ff);
200
output[chars++] = 'u';
201
output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
202
output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
203
output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
204
output[chars++] = "0123456789abcdef"[(c ) & 0xf];
205
c = 0xdc00 | (v & 0x3ff);
206
output[chars++] = '\\';
209
output[chars++] = 'u';
210
output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
211
output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
212
output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
213
output[chars++] = "0123456789abcdef"[(c ) & 0xf];
219
ascii_escape_unicode(PyObject *pystr)
221
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
223
Py_ssize_t input_chars;
224
Py_ssize_t output_size;
225
Py_ssize_t max_output_size;
229
Py_UNICODE *input_unicode;
231
input_chars = PyUnicode_GET_SIZE(pystr);
232
input_unicode = PyUnicode_AS_UNICODE(pystr);
234
/* One char input can be up to 6 chars output, estimate 4 of these */
235
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
236
max_output_size = 2 + (input_chars * MAX_EXPANSION);
237
rval = PyString_FromStringAndSize(NULL, output_size);
241
output = PyString_AS_STRING(rval);
243
output[chars++] = '"';
244
for (i = 0; i < input_chars; i++) {
245
Py_UNICODE c = input_unicode[i];
247
output[chars++] = (char)c;
250
chars = ascii_escape_char(c, output, chars);
252
if (output_size - chars < (1 + MAX_EXPANSION)) {
253
/* There's more than four, so let's resize by a lot */
254
Py_ssize_t new_output_size = output_size * 2;
255
/* This is an upper bound */
256
if (new_output_size > max_output_size) {
257
new_output_size = max_output_size;
259
/* Make sure that the output size changed before resizing */
260
if (new_output_size != output_size) {
261
output_size = new_output_size;
262
if (_PyString_Resize(&rval, output_size) == -1) {
265
output = PyString_AS_STRING(rval);
269
output[chars++] = '"';
270
if (_PyString_Resize(&rval, chars) == -1) {
277
ascii_escape_str(PyObject *pystr)
279
/* Take a PyString pystr and return a new ASCII-only escaped PyString */
281
Py_ssize_t input_chars;
282
Py_ssize_t output_size;
288
input_chars = PyString_GET_SIZE(pystr);
289
input_str = PyString_AS_STRING(pystr);
291
/* Fast path for a string that's already ASCII */
292
for (i = 0; i < input_chars; i++) {
293
Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
295
/* If we have to escape something, scan the string for unicode */
297
for (j = i; j < input_chars; j++) {
298
c = (Py_UNICODE)(unsigned char)input_str[j];
300
/* We hit a non-ASCII character, bail to unicode mode */
302
uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
306
rval = ascii_escape_unicode(uni);
315
if (i == input_chars) {
316
/* Input is already ASCII */
317
output_size = 2 + input_chars;
320
/* One char input can be up to 6 chars output, estimate 4 of these */
321
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
323
rval = PyString_FromStringAndSize(NULL, output_size);
327
output = PyString_AS_STRING(rval);
330
/* We know that everything up to i is ASCII already */
332
memcpy(&output[1], input_str, i);
334
for (; i < input_chars; i++) {
335
Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
337
output[chars++] = (char)c;
340
chars = ascii_escape_char(c, output, chars);
342
/* An ASCII char can't possibly expand to a surrogate! */
343
if (output_size - chars < (1 + MIN_EXPANSION)) {
344
/* There's more than four, so let's resize by a lot */
346
if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
347
output_size = 2 + (input_chars * MIN_EXPANSION);
349
if (_PyString_Resize(&rval, output_size) == -1) {
352
output = PyString_AS_STRING(rval);
355
output[chars++] = '"';
356
if (_PyString_Resize(&rval, chars) == -1) {
363
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
365
/* Use the Python function simplejson.decoder.errmsg to raise a nice
366
looking ValueError exception */
367
static PyObject *JSONDecodeError = NULL;
369
if (JSONDecodeError == NULL) {
370
PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
373
JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
375
if (JSONDecodeError == NULL)
378
exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
380
PyErr_SetObject(JSONDecodeError, exc);
386
join_list_unicode(PyObject *lst)
388
/* return u''.join(lst) */
389
static PyObject *joinfn = NULL;
390
if (joinfn == NULL) {
391
PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
395
joinfn = PyObject_GetAttrString(ustr, "join");
400
return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
404
join_list_string(PyObject *lst)
406
/* return ''.join(lst) */
407
static PyObject *joinfn = NULL;
408
if (joinfn == NULL) {
409
PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
413
joinfn = PyObject_GetAttrString(ustr, "join");
418
return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
422
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
423
/* return (rval, idx) tuple, stealing reference to rval */
427
steal a reference to rval, returns (rval, idx)
432
pyidx = PyInt_FromSsize_t(idx);
437
tpl = PyTuple_New(2);
443
PyTuple_SET_ITEM(tpl, 0, rval);
444
PyTuple_SET_ITEM(tpl, 1, pyidx);
448
#define APPEND_OLD_CHUNK \
449
if (chunk != NULL) { \
450
if (chunks == NULL) { \
451
chunks = PyList_New(0); \
452
if (chunks == NULL) { \
456
if (PyList_Append(chunks, chunk)) { \
463
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
465
/* Read the JSON string from PyString pystr.
466
end is the index of the first character after the quote.
467
encoding is the encoding of pystr (must be an ASCII superset)
468
if strict is zero then literal control characters are allowed
469
*next_end_ptr is a return-by-reference index of the character
472
Return value is a new PyString (if ASCII-only) or PyUnicode
475
Py_ssize_t len = PyString_GET_SIZE(pystr);
476
Py_ssize_t begin = end - 1;
477
Py_ssize_t next = begin;
479
char *buf = PyString_AS_STRING(pystr);
480
PyObject *chunks = NULL;
481
PyObject *chunk = NULL;
483
if (end < 0 || len <= end) {
484
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
488
/* Find the end of the string or the next escape */
490
for (next = end; next < len; next++) {
491
c = (unsigned char)buf[next];
492
if (c == '"' || c == '\\') {
495
else if (strict && c <= 0x1f) {
496
raise_errmsg("Invalid control character at", pystr, next);
503
if (!(c == '"' || c == '\\')) {
504
raise_errmsg("Unterminated string starting at", pystr, begin);
507
/* Pick up this chunk if it's not zero length */
511
strchunk = PyString_FromStringAndSize(&buf[end], next - end);
512
if (strchunk == NULL) {
516
chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
532
raise_errmsg("Unterminated string starting at", pystr, begin);
537
/* Non-unicode backslash escapes */
543
case 'b': c = '\b'; break;
544
case 'f': c = '\f'; break;
545
case 'n': c = '\n'; break;
546
case 'r': c = '\r'; break;
547
case 't': c = '\t'; break;
551
raise_errmsg("Invalid \\escape", pystr, end - 2);
560
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
563
/* Decode 4 hex digits */
564
for (; next < end; next++) {
565
Py_UNICODE digit = buf[next];
568
case '0': case '1': case '2': case '3': case '4':
569
case '5': case '6': case '7': case '8': case '9':
570
c |= (digit - '0'); break;
571
case 'a': case 'b': case 'c': case 'd': case 'e':
573
c |= (digit - 'a' + 10); break;
574
case 'A': case 'B': case 'C': case 'D': case 'E':
576
c |= (digit - 'A' + 10); break;
578
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
582
#ifdef Py_UNICODE_WIDE
584
if ((c & 0xfc00) == 0xd800) {
586
if (end + 6 >= len) {
587
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
590
if (buf[next++] != '\\' || buf[next++] != 'u') {
591
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
595
/* Decode 4 hex digits */
596
for (; next < end; next++) {
598
Py_UNICODE digit = buf[next];
600
case '0': case '1': case '2': case '3': case '4':
601
case '5': case '6': case '7': case '8': case '9':
602
c2 |= (digit - '0'); break;
603
case 'a': case 'b': case 'c': case 'd': case 'e':
605
c2 |= (digit - 'a' + 10); break;
606
case 'A': case 'B': case 'C': case 'D': case 'E':
608
c2 |= (digit - 'A' + 10); break;
610
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
614
if ((c2 & 0xfc00) != 0xdc00) {
615
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
618
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
620
else if ((c & 0xfc00) == 0xdc00) {
621
raise_errmsg("Unpaired low surrogate", pystr, end - 5);
631
chunk = PyUnicode_FromUnicode(&c, 1);
637
char c_char = Py_CHARMASK(c);
638
chunk = PyString_FromStringAndSize(&c_char, 1);
645
if (chunks == NULL) {
649
rval = PyString_FromStringAndSize("", 0);
653
rval = join_list_string(chunks);
671
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
673
/* Read the JSON string from PyUnicode pystr.
674
end is the index of the first character after the quote.
675
if strict is zero then literal control characters are allowed
676
*next_end_ptr is a return-by-reference index of the character
679
Return value is a new PyUnicode
682
Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
683
Py_ssize_t begin = end - 1;
684
Py_ssize_t next = begin;
685
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
686
PyObject *chunks = NULL;
687
PyObject *chunk = NULL;
689
if (end < 0 || len <= end) {
690
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
694
/* Find the end of the string or the next escape */
696
for (next = end; next < len; next++) {
698
if (c == '"' || c == '\\') {
701
else if (strict && c <= 0x1f) {
702
raise_errmsg("Invalid control character at", pystr, next);
706
if (!(c == '"' || c == '\\')) {
707
raise_errmsg("Unterminated string starting at", pystr, begin);
710
/* Pick up this chunk if it's not zero length */
713
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
724
raise_errmsg("Unterminated string starting at", pystr, begin);
729
/* Non-unicode backslash escapes */
735
case 'b': c = '\b'; break;
736
case 'f': c = '\f'; break;
737
case 'n': c = '\n'; break;
738
case 'r': c = '\r'; break;
739
case 't': c = '\t'; break;
743
raise_errmsg("Invalid \\escape", pystr, end - 2);
752
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
755
/* Decode 4 hex digits */
756
for (; next < end; next++) {
757
Py_UNICODE digit = buf[next];
760
case '0': case '1': case '2': case '3': case '4':
761
case '5': case '6': case '7': case '8': case '9':
762
c |= (digit - '0'); break;
763
case 'a': case 'b': case 'c': case 'd': case 'e':
765
c |= (digit - 'a' + 10); break;
766
case 'A': case 'B': case 'C': case 'D': case 'E':
768
c |= (digit - 'A' + 10); break;
770
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
774
#ifdef Py_UNICODE_WIDE
776
if ((c & 0xfc00) == 0xd800) {
778
if (end + 6 >= len) {
779
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
782
if (buf[next++] != '\\' || buf[next++] != 'u') {
783
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
787
/* Decode 4 hex digits */
788
for (; next < end; next++) {
790
Py_UNICODE digit = buf[next];
792
case '0': case '1': case '2': case '3': case '4':
793
case '5': case '6': case '7': case '8': case '9':
794
c2 |= (digit - '0'); break;
795
case 'a': case 'b': case 'c': case 'd': case 'e':
797
c2 |= (digit - 'a' + 10); break;
798
case 'A': case 'B': case 'C': case 'D': case 'E':
800
c2 |= (digit - 'A' + 10); break;
802
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
806
if ((c2 & 0xfc00) != 0xdc00) {
807
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
810
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
812
else if ((c & 0xfc00) == 0xdc00) {
813
raise_errmsg("Unpaired low surrogate", pystr, end - 5);
819
chunk = PyUnicode_FromUnicode(&c, 1);
825
if (chunks == NULL) {
829
rval = PyUnicode_FromUnicode(NULL, 0);
833
rval = join_list_unicode(chunks);
848
PyDoc_STRVAR(pydoc_scanstring,
849
"scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
851
"Scan the string s for a JSON string. End is the index of the\n"
852
"character in s after the quote that started the JSON string.\n"
853
"Unescapes all valid JSON string escape sequences and raises ValueError\n"
854
"on attempt to decode an invalid string. If strict is False then literal\n"
855
"control characters are allowed in the string.\n"
857
"Returns a tuple of the decoded string and the index of the character in s\n"
858
"after the end quote."
862
py_scanstring(PyObject* self UNUSED, PyObject *args)
867
Py_ssize_t next_end = -1;
868
char *encoding = NULL;
870
if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
873
if (encoding == NULL) {
874
encoding = DEFAULT_ENCODING;
876
if (PyString_Check(pystr)) {
877
rval = scanstring_str(pystr, end, encoding, strict, &next_end);
879
else if (PyUnicode_Check(pystr)) {
880
rval = scanstring_unicode(pystr, end, strict, &next_end);
883
PyErr_Format(PyExc_TypeError,
884
"first argument must be a string, not %.80s",
885
Py_TYPE(pystr)->tp_name);
888
return _build_rval_index_tuple(rval, next_end);
891
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
892
"encode_basestring_ascii(basestring) -> str\n"
894
"Return an ASCII-only JSON representation of a Python string"
898
py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
900
/* Return an ASCII-only JSON representation of a Python string */
902
if (PyString_Check(pystr)) {
903
return ascii_escape_str(pystr);
905
else if (PyUnicode_Check(pystr)) {
906
return ascii_escape_unicode(pystr);
909
PyErr_Format(PyExc_TypeError,
910
"first argument must be a string, not %.80s",
911
Py_TYPE(pystr)->tp_name);
917
scanner_dealloc(PyObject *self)
919
/* Deallocate scanner object */
921
Py_TYPE(self)->tp_free(self);
925
scanner_traverse(PyObject *self, visitproc visit, void *arg)
928
assert(PyScanner_Check(self));
929
s = (PyScannerObject *)self;
930
Py_VISIT(s->encoding);
932
Py_VISIT(s->object_hook);
933
Py_VISIT(s->pairs_hook);
934
Py_VISIT(s->parse_float);
935
Py_VISIT(s->parse_int);
936
Py_VISIT(s->parse_constant);
942
scanner_clear(PyObject *self)
945
assert(PyScanner_Check(self));
946
s = (PyScannerObject *)self;
947
Py_CLEAR(s->encoding);
949
Py_CLEAR(s->object_hook);
950
Py_CLEAR(s->pairs_hook);
951
Py_CLEAR(s->parse_float);
952
Py_CLEAR(s->parse_int);
953
Py_CLEAR(s->parse_constant);
959
_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
960
/* Read a JSON object from PyString pystr.
961
idx is the index of the first character after the opening curly brace.
962
*next_idx_ptr is a return-by-reference index to the first character after
963
the closing curly brace.
965
Returns a new PyObject (usually a dict, but object_hook or
966
object_pairs_hook can change that)
968
char *str = PyString_AS_STRING(pystr);
969
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
970
PyObject *rval = NULL;
971
PyObject *pairs = NULL;
973
PyObject *key = NULL;
974
PyObject *val = NULL;
975
char *encoding = PyString_AS_STRING(s->encoding);
976
int strict = PyObject_IsTrue(s->strict);
977
int has_pairs_hook = (s->pairs_hook != Py_None);
979
if (has_pairs_hook) {
980
pairs = PyList_New(0);
990
/* skip whitespace after { */
991
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
993
/* only loop if the object is non-empty */
994
if (idx <= end_idx && str[idx] != '}') {
995
while (idx <= end_idx) {
999
if (str[idx] != '"') {
1000
raise_errmsg("Expecting property name", pystr, idx);
1003
key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
1006
memokey = PyDict_GetItem(s->memo, key);
1007
if (memokey != NULL) {
1013
if (PyDict_SetItem(s->memo, key, key) < 0)
1018
/* skip whitespace between key and : delimiter, read :, skip whitespace */
1019
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1020
if (idx > end_idx || str[idx] != ':') {
1021
raise_errmsg("Expecting : delimiter", pystr, idx);
1025
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1027
/* read any JSON data type */
1028
val = scan_once_str(s, pystr, idx, &next_idx);
1032
if (has_pairs_hook) {
1033
item = PyTuple_Pack(2, key, val);
1038
if (PyList_Append(pairs, item) == -1) {
1045
if (PyDict_SetItem(rval, key, val) < 0)
1052
/* skip whitespace before } or , */
1053
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1055
/* bail if the object is closed or we didn't get the , delimiter */
1056
if (idx > end_idx) break;
1057
if (str[idx] == '}') {
1060
else if (str[idx] != ',') {
1061
raise_errmsg("Expecting , delimiter", pystr, idx);
1066
/* skip whitespace after , delimiter */
1067
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1070
/* verify that idx < end_idx, str[idx] should be '}' */
1071
if (idx > end_idx || str[idx] != '}') {
1072
raise_errmsg("Expecting object", pystr, end_idx);
1076
/* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1077
if (s->pairs_hook != Py_None) {
1078
val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1082
*next_idx_ptr = idx + 1;
1086
/* if object_hook is not None: rval = object_hook(rval) */
1087
if (s->object_hook != Py_None) {
1088
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1095
*next_idx_ptr = idx + 1;
1106
_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1107
/* Read a JSON object from PyUnicode pystr.
1108
idx is the index of the first character after the opening curly brace.
1109
*next_idx_ptr is a return-by-reference index to the first character after
1110
the closing curly brace.
1112
Returns a new PyObject (usually a dict, but object_hook can change that)
1114
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1115
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1116
PyObject *rval = NULL;
1117
PyObject *pairs = NULL;
1119
PyObject *key = NULL;
1120
PyObject *val = NULL;
1121
int strict = PyObject_IsTrue(s->strict);
1122
int has_pairs_hook = (s->pairs_hook != Py_None);
1123
Py_ssize_t next_idx;
1125
if (has_pairs_hook) {
1126
pairs = PyList_New(0);
1131
rval = PyDict_New();
1136
/* skip whitespace after { */
1137
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1139
/* only loop if the object is non-empty */
1140
if (idx <= end_idx && str[idx] != '}') {
1141
while (idx <= end_idx) {
1145
if (str[idx] != '"') {
1146
raise_errmsg("Expecting property name", pystr, idx);
1149
key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1152
memokey = PyDict_GetItem(s->memo, key);
1153
if (memokey != NULL) {
1159
if (PyDict_SetItem(s->memo, key, key) < 0)
1164
/* skip whitespace between key and : delimiter, read :, skip whitespace */
1165
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1166
if (idx > end_idx || str[idx] != ':') {
1167
raise_errmsg("Expecting : delimiter", pystr, idx);
1171
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1173
/* read any JSON term */
1174
val = scan_once_unicode(s, pystr, idx, &next_idx);
1178
if (has_pairs_hook) {
1179
item = PyTuple_Pack(2, key, val);
1184
if (PyList_Append(pairs, item) == -1) {
1191
if (PyDict_SetItem(rval, key, val) < 0)
1198
/* skip whitespace before } or , */
1199
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1201
/* bail if the object is closed or we didn't get the , delimiter */
1202
if (idx > end_idx) break;
1203
if (str[idx] == '}') {
1206
else if (str[idx] != ',') {
1207
raise_errmsg("Expecting , delimiter", pystr, idx);
1212
/* skip whitespace after , delimiter */
1213
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1217
/* verify that idx < end_idx, str[idx] should be '}' */
1218
if (idx > end_idx || str[idx] != '}') {
1219
raise_errmsg("Expecting object", pystr, end_idx);
1223
/* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1224
if (s->pairs_hook != Py_None) {
1225
val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1229
*next_idx_ptr = idx + 1;
1233
/* if object_hook is not None: rval = object_hook(rval) */
1234
if (s->object_hook != Py_None) {
1235
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1242
*next_idx_ptr = idx + 1;
1253
_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1254
/* Read a JSON array from PyString pystr.
1255
idx is the index of the first character after the opening brace.
1256
*next_idx_ptr is a return-by-reference index to the first character after
1259
Returns a new PyList
1261
char *str = PyString_AS_STRING(pystr);
1262
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1263
PyObject *val = NULL;
1264
PyObject *rval = PyList_New(0);
1265
Py_ssize_t next_idx;
1269
/* skip whitespace after [ */
1270
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1272
/* only loop if the array is non-empty */
1273
if (idx <= end_idx && str[idx] != ']') {
1274
while (idx <= end_idx) {
1276
/* read any JSON term and de-tuplefy the (rval, idx) */
1277
val = scan_once_str(s, pystr, idx, &next_idx);
1279
if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1281
raise_errmsg("Expecting object", pystr, idx);
1286
if (PyList_Append(rval, val) == -1)
1292
/* skip whitespace between term and , */
1293
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1295
/* bail if the array is closed or we didn't get the , delimiter */
1296
if (idx > end_idx) break;
1297
if (str[idx] == ']') {
1300
else if (str[idx] != ',') {
1301
raise_errmsg("Expecting , delimiter", pystr, idx);
1306
/* skip whitespace after , */
1307
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1311
/* verify that idx < end_idx, str[idx] should be ']' */
1312
if (idx > end_idx || str[idx] != ']') {
1313
raise_errmsg("Expecting object", pystr, end_idx);
1316
*next_idx_ptr = idx + 1;
1325
_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1326
/* Read a JSON array from PyString pystr.
1327
idx is the index of the first character after the opening brace.
1328
*next_idx_ptr is a return-by-reference index to the first character after
1331
Returns a new PyList
1333
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1334
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1335
PyObject *val = NULL;
1336
PyObject *rval = PyList_New(0);
1337
Py_ssize_t next_idx;
1341
/* skip whitespace after [ */
1342
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1344
/* only loop if the array is non-empty */
1345
if (idx <= end_idx && str[idx] != ']') {
1346
while (idx <= end_idx) {
1348
/* read any JSON term */
1349
val = scan_once_unicode(s, pystr, idx, &next_idx);
1351
if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1353
raise_errmsg("Expecting object", pystr, idx);
1358
if (PyList_Append(rval, val) == -1)
1364
/* skip whitespace between term and , */
1365
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1367
/* bail if the array is closed or we didn't get the , delimiter */
1368
if (idx > end_idx) break;
1369
if (str[idx] == ']') {
1372
else if (str[idx] != ',') {
1373
raise_errmsg("Expecting , delimiter", pystr, idx);
1378
/* skip whitespace after , */
1379
while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1383
/* verify that idx < end_idx, str[idx] should be ']' */
1384
if (idx > end_idx || str[idx] != ']') {
1385
raise_errmsg("Expecting object", pystr, end_idx);
1388
*next_idx_ptr = idx + 1;
1397
_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1398
/* Read a JSON constant from PyString pystr.
1399
constant is the constant string that was found
1400
("NaN", "Infinity", "-Infinity").
1401
idx is the index of the first character of the constant
1402
*next_idx_ptr is a return-by-reference index to the first character after
1405
Returns the result of parse_constant
1409
/* constant is "NaN", "Infinity", or "-Infinity" */
1410
cstr = PyString_InternFromString(constant);
1414
/* rval = parse_constant(constant) */
1415
rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1416
idx += PyString_GET_SIZE(cstr);
1418
*next_idx_ptr = idx;
1423
_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1424
/* Read a JSON number from PyString pystr.
1425
idx is the index of the first character of the number
1426
*next_idx_ptr is a return-by-reference index to the first character after
1429
Returns a new PyObject representation of that number:
1430
PyInt, PyLong, or PyFloat.
1431
May return other types if parse_int or parse_float are set
1433
char *str = PyString_AS_STRING(pystr);
1434
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1435
Py_ssize_t idx = start;
1440
/* read a sign if it's there, make sure it's not the end of the string */
1441
if (str[idx] == '-') {
1443
if (idx > end_idx) {
1444
PyErr_SetNone(PyExc_StopIteration);
1449
/* read as many integer digits as we find as long as it doesn't start with 0 */
1450
if (str[idx] >= '1' && str[idx] <= '9') {
1452
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1454
/* if it starts with 0 we only expect one integer digit */
1455
else if (str[idx] == '0') {
1458
/* no integer digits, error */
1460
PyErr_SetNone(PyExc_StopIteration);
1464
/* if the next char is '.' followed by a digit then read all float digits */
1465
if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1468
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1471
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1472
if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1474
/* save the index of the 'e' or 'E' just in case we need to backtrack */
1475
Py_ssize_t e_start = idx;
1478
/* read an exponent sign if present */
1479
if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1481
/* read all digits */
1482
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1484
/* if we got a digit, then parse as float. if not, backtrack */
1485
if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1493
/* copy the section we determined to be a number */
1494
numstr = PyString_FromStringAndSize(&str[start], idx - start);
1498
/* parse as a float using a fast path if available, otherwise call user defined method */
1499
if (s->parse_float != (PyObject *)&PyFloat_Type) {
1500
rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1503
/* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
1504
double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1506
if (d == -1.0 && PyErr_Occurred())
1508
rval = PyFloat_FromDouble(d);
1512
/* parse as an int using a fast path if available, otherwise call user defined method */
1513
if (s->parse_int != (PyObject *)&PyInt_Type) {
1514
rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1517
rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1521
*next_idx_ptr = idx;
1526
_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1527
/* Read a JSON number from PyUnicode pystr.
1528
idx is the index of the first character of the number
1529
*next_idx_ptr is a return-by-reference index to the first character after
1532
Returns a new PyObject representation of that number:
1533
PyInt, PyLong, or PyFloat.
1534
May return other types if parse_int or parse_float are set
1536
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1537
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1538
Py_ssize_t idx = start;
1543
/* read a sign if it's there, make sure it's not the end of the string */
1544
if (str[idx] == '-') {
1546
if (idx > end_idx) {
1547
PyErr_SetNone(PyExc_StopIteration);
1552
/* read as many integer digits as we find as long as it doesn't start with 0 */
1553
if (str[idx] >= '1' && str[idx] <= '9') {
1555
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1557
/* if it starts with 0 we only expect one integer digit */
1558
else if (str[idx] == '0') {
1561
/* no integer digits, error */
1563
PyErr_SetNone(PyExc_StopIteration);
1567
/* if the next char is '.' followed by a digit then read all float digits */
1568
if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1571
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1574
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1575
if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1576
Py_ssize_t e_start = idx;
1579
/* read an exponent sign if present */
1580
if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1582
/* read all digits */
1583
while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1585
/* if we got a digit, then parse as float. if not, backtrack */
1586
if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1594
/* copy the section we determined to be a number */
1595
numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1599
/* parse as a float using a fast path if available, otherwise call user defined method */
1600
if (s->parse_float != (PyObject *)&PyFloat_Type) {
1601
rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1604
rval = PyFloat_FromString(numstr, NULL);
1608
/* no fast path for unicode -> int, just call */
1609
rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1612
*next_idx_ptr = idx;
1617
scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1619
/* Read one JSON term (of any kind) from PyString pystr.
1620
idx is the index of the first character of the term
1621
*next_idx_ptr is a return-by-reference index to the first character after
1624
Returns a new PyObject representation of the term.
1626
char *str = PyString_AS_STRING(pystr);
1627
Py_ssize_t length = PyString_GET_SIZE(pystr);
1628
if (idx >= length) {
1629
PyErr_SetNone(PyExc_StopIteration);
1635
return scanstring_str(pystr, idx + 1,
1636
PyString_AS_STRING(s->encoding),
1637
PyObject_IsTrue(s->strict),
1641
return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1644
return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1647
if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1649
*next_idx_ptr = idx + 4;
1655
if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1657
*next_idx_ptr = idx + 4;
1663
if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1664
Py_INCREF(Py_False);
1665
*next_idx_ptr = idx + 5;
1671
if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1672
return _parse_constant(s, "NaN", idx, next_idx_ptr);
1677
if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1678
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1683
if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1684
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1688
/* Didn't find a string, object, array, or named constant. Look for a number. */
1689
return _match_number_str(s, pystr, idx, next_idx_ptr);
1693
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1695
/* Read one JSON term (of any kind) from PyUnicode pystr.
1696
idx is the index of the first character of the term
1697
*next_idx_ptr is a return-by-reference index to the first character after
1700
Returns a new PyObject representation of the term.
1702
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1703
Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1704
if (idx >= length) {
1705
PyErr_SetNone(PyExc_StopIteration);
1711
return scanstring_unicode(pystr, idx + 1,
1712
PyObject_IsTrue(s->strict),
1716
return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1719
return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1722
if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1724
*next_idx_ptr = idx + 4;
1730
if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1732
*next_idx_ptr = idx + 4;
1738
if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1739
Py_INCREF(Py_False);
1740
*next_idx_ptr = idx + 5;
1746
if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1747
return _parse_constant(s, "NaN", idx, next_idx_ptr);
1752
if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1753
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1758
if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1759
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1763
/* Didn't find a string, object, array, or named constant. Look for a number. */
1764
return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1768
scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1770
/* Python callable interface to scan_once_{str,unicode} */
1774
Py_ssize_t next_idx = -1;
1775
static char *kwlist[] = {"string", "idx", NULL};
1777
assert(PyScanner_Check(self));
1778
s = (PyScannerObject *)self;
1779
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1782
if (PyString_Check(pystr)) {
1783
rval = scan_once_str(s, pystr, idx, &next_idx);
1785
else if (PyUnicode_Check(pystr)) {
1786
rval = scan_once_unicode(s, pystr, idx, &next_idx);
1789
PyErr_Format(PyExc_TypeError,
1790
"first argument must be a string, not %.80s",
1791
Py_TYPE(pystr)->tp_name);
1794
PyDict_Clear(s->memo);
1795
return _build_rval_index_tuple(rval, next_idx);
1799
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1802
s = (PyScannerObject *)type->tp_alloc(type, 0);
1806
s->object_hook = NULL;
1807
s->pairs_hook = NULL;
1808
s->parse_float = NULL;
1809
s->parse_int = NULL;
1810
s->parse_constant = NULL;
1812
return (PyObject *)s;
1816
scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1818
/* Initialize Scanner object */
1820
static char *kwlist[] = {"context", NULL};
1823
assert(PyScanner_Check(self));
1824
s = (PyScannerObject *)self;
1826
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1829
if (s->memo == NULL) {
1830
s->memo = PyDict_New();
1831
if (s->memo == NULL)
1835
/* PyString_AS_STRING is used on encoding */
1836
s->encoding = PyObject_GetAttrString(ctx, "encoding");
1837
if (s->encoding == NULL)
1839
if (s->encoding == Py_None) {
1841
s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1843
else if (PyUnicode_Check(s->encoding)) {
1844
PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1845
Py_DECREF(s->encoding);
1848
if (s->encoding == NULL || !PyString_Check(s->encoding))
1851
/* All of these will fail "gracefully" so we don't need to verify them */
1852
s->strict = PyObject_GetAttrString(ctx, "strict");
1853
if (s->strict == NULL)
1855
s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1856
if (s->object_hook == NULL)
1858
s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1859
if (s->pairs_hook == NULL)
1861
s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1862
if (s->parse_float == NULL)
1864
s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1865
if (s->parse_int == NULL)
1867
s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1868
if (s->parse_constant == NULL)
1874
Py_CLEAR(s->encoding);
1875
Py_CLEAR(s->strict);
1876
Py_CLEAR(s->object_hook);
1877
Py_CLEAR(s->pairs_hook);
1878
Py_CLEAR(s->parse_float);
1879
Py_CLEAR(s->parse_int);
1880
Py_CLEAR(s->parse_constant);
1884
PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1887
PyTypeObject PyScannerType = {
1888
PyObject_HEAD_INIT(NULL)
1889
0, /* tp_internal */
1890
"simplejson._speedups.Scanner", /* tp_name */
1891
sizeof(PyScannerObject), /* tp_basicsize */
1892
0, /* tp_itemsize */
1893
scanner_dealloc, /* tp_dealloc */
1899
0, /* tp_as_number */
1900
0, /* tp_as_sequence */
1901
0, /* tp_as_mapping */
1903
scanner_call, /* tp_call */
1905
0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1906
0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1907
0, /* tp_as_buffer */
1908
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1909
scanner_doc, /* tp_doc */
1910
scanner_traverse, /* tp_traverse */
1911
scanner_clear, /* tp_clear */
1912
0, /* tp_richcompare */
1913
0, /* tp_weaklistoffset */
1915
0, /* tp_iternext */
1917
scanner_members, /* tp_members */
1921
0, /* tp_descr_get */
1922
0, /* tp_descr_set */
1923
0, /* tp_dictoffset */
1924
scanner_init, /* tp_init */
1925
0,/* PyType_GenericAlloc, */ /* tp_alloc */
1926
scanner_new, /* tp_new */
1927
0,/* PyObject_GC_Del, */ /* tp_free */
1931
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1934
s = (PyEncoderObject *)type->tp_alloc(type, 0);
1937
s->defaultfn = NULL;
1940
s->key_separator = NULL;
1941
s->item_separator = NULL;
1942
s->sort_keys = NULL;
1946
return (PyObject *)s;
1950
encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1952
/* initialize Encoder object */
1953
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", NULL};
1956
PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1957
PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal;
1959
assert(PyEncoder_Check(self));
1960
s = (PyEncoderObject *)self;
1962
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOO:make_encoder", kwlist,
1963
&markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1964
&sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal))
1967
s->markers = markers;
1968
s->defaultfn = defaultfn;
1969
s->encoder = encoder;
1971
s->key_separator = key_separator;
1972
s->item_separator = item_separator;
1973
s->sort_keys = sort_keys;
1974
s->skipkeys = skipkeys;
1975
s->key_memo = key_memo;
1976
s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1977
s->allow_nan = PyObject_IsTrue(allow_nan);
1978
s->use_decimal = PyObject_IsTrue(use_decimal);
1980
Py_INCREF(s->markers);
1981
Py_INCREF(s->defaultfn);
1982
Py_INCREF(s->encoder);
1983
Py_INCREF(s->indent);
1984
Py_INCREF(s->key_separator);
1985
Py_INCREF(s->item_separator);
1986
Py_INCREF(s->sort_keys);
1987
Py_INCREF(s->skipkeys);
1988
Py_INCREF(s->key_memo);
1993
encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1995
/* Python callable interface to encode_listencode_obj */
1996
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1999
Py_ssize_t indent_level;
2001
assert(PyEncoder_Check(self));
2002
s = (PyEncoderObject *)self;
2003
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2004
&obj, _convertPyInt_AsSsize_t, &indent_level))
2006
rval = PyList_New(0);
2009
if (encoder_listencode_obj(s, rval, obj, indent_level)) {
2017
_encoded_const(PyObject *obj)
2019
/* Return the JSON string representation of None, True, False */
2020
if (obj == Py_None) {
2021
static PyObject *s_null = NULL;
2022
if (s_null == NULL) {
2023
s_null = PyString_InternFromString("null");
2028
else if (obj == Py_True) {
2029
static PyObject *s_true = NULL;
2030
if (s_true == NULL) {
2031
s_true = PyString_InternFromString("true");
2036
else if (obj == Py_False) {
2037
static PyObject *s_false = NULL;
2038
if (s_false == NULL) {
2039
s_false = PyString_InternFromString("false");
2045
PyErr_SetString(PyExc_ValueError, "not a const");
2051
encoder_encode_float(PyEncoderObject *s, PyObject *obj)
2053
/* Return the JSON representation of a PyFloat */
2054
double i = PyFloat_AS_DOUBLE(obj);
2055
if (!Py_IS_FINITE(i)) {
2056
if (!s->allow_nan) {
2057
PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2061
return PyString_FromString("Infinity");
2064
return PyString_FromString("-Infinity");
2067
return PyString_FromString("NaN");
2070
/* Use a better float format here? */
2071
return PyObject_Repr(obj);
2075
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
2077
/* Return the JSON representation of a string */
2079
return py_encode_basestring_ascii(NULL, obj);
2081
return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2085
_steal_list_append(PyObject *lst, PyObject *stolen)
2087
/* Append stolen and then decrement its reference count */
2088
int rval = PyList_Append(lst, stolen);
2094
encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2096
/* Encode Python object obj to a JSON term, rval is a PyList */
2100
if (obj == Py_None || obj == Py_True || obj == Py_False) {
2101
PyObject *cstr = _encoded_const(obj);
2104
return _steal_list_append(rval, cstr);
2106
else if (PyString_Check(obj) || PyUnicode_Check(obj))
2108
PyObject *encoded = encoder_encode_string(s, obj);
2109
if (encoded == NULL)
2111
return _steal_list_append(rval, encoded);
2113
else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2114
PyObject *encoded = PyObject_Str(obj);
2115
if (encoded == NULL)
2117
return _steal_list_append(rval, encoded);
2119
else if (PyFloat_Check(obj)) {
2120
PyObject *encoded = encoder_encode_float(s, obj);
2121
if (encoded == NULL)
2123
return _steal_list_append(rval, encoded);
2125
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2126
return encoder_listencode_list(s, rval, obj, indent_level);
2128
else if (PyDict_Check(obj)) {
2129
return encoder_listencode_dict(s, rval, obj, indent_level);
2131
else if (s->use_decimal && Decimal_Check(obj)) {
2132
PyObject *encoded = PyObject_Str(obj);
2133
if (encoded == NULL)
2135
return _steal_list_append(rval, encoded);
2138
PyObject *ident = NULL;
2139
if (s->markers != Py_None) {
2141
ident = PyLong_FromVoidPtr(obj);
2144
has_key = PyDict_Contains(s->markers, ident);
2147
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2151
if (PyDict_SetItem(s->markers, ident, obj)) {
2156
newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2157
if (newobj == NULL) {
2161
rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2167
if (ident != NULL) {
2168
if (PyDict_DelItem(s->markers, ident)) {
2179
encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2181
/* Encode Python dict dct a JSON term, rval is a PyList */
2182
static PyObject *open_dict = NULL;
2183
static PyObject *close_dict = NULL;
2184
static PyObject *empty_dict = NULL;
2185
static PyObject *iteritems = NULL;
2186
PyObject *kstr = NULL;
2187
PyObject *ident = NULL;
2188
PyObject *key, *value;
2189
PyObject *iter = NULL;
2190
PyObject *item = NULL;
2191
PyObject *encoded = NULL;
2195
if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
2196
open_dict = PyString_InternFromString("{");
2197
close_dict = PyString_InternFromString("}");
2198
empty_dict = PyString_InternFromString("{}");
2199
iteritems = PyString_InternFromString("iteritems");
2200
if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
2203
if (PyDict_Size(dct) == 0)
2204
return PyList_Append(rval, empty_dict);
2206
if (s->markers != Py_None) {
2208
ident = PyLong_FromVoidPtr(dct);
2211
has_key = PyDict_Contains(s->markers, ident);
2214
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2217
if (PyDict_SetItem(s->markers, ident, dct)) {
2222
if (PyList_Append(rval, open_dict))
2225
if (s->indent != Py_None) {
2226
/* TODO: DOES NOT RUN */
2229
newline_indent = '\n' + (_indent * _current_indent_level)
2230
separator = _item_separator + newline_indent
2231
buf += newline_indent
2235
/* TODO: C speedup not implemented for sort_keys */
2237
skipkeys = PyObject_IsTrue(s->skipkeys);
2239
iter = PyObject_CallMethodObjArgs(dct, iteritems, NULL);
2242
while ((item = PyIter_Next(iter))) {
2244
key = PyTuple_GetItem(item, 0);
2247
value = PyTuple_GetItem(item, 1);
2251
encoded = PyDict_GetItem(s->key_memo, key);
2252
if (encoded != NULL) {
2255
else if (PyString_Check(key) || PyUnicode_Check(key)) {
2259
else if (PyFloat_Check(key)) {
2260
kstr = encoder_encode_float(s, key);
2264
else if (PyInt_Check(key) || PyLong_Check(key)) {
2265
kstr = PyObject_Str(key);
2269
else if (key == Py_True || key == Py_False || key == Py_None) {
2270
kstr = _encoded_const(key);
2274
else if (skipkeys) {
2279
/* TODO: include repr of key */
2280
PyErr_SetString(PyExc_ValueError, "keys must be a string");
2285
if (PyList_Append(rval, s->item_separator))
2289
if (encoded == NULL) {
2290
encoded = encoder_encode_string(s, kstr);
2292
if (encoded == NULL)
2294
if (PyDict_SetItem(s->key_memo, key, encoded))
2297
if (PyList_Append(rval, encoded)) {
2301
if (PyList_Append(rval, s->key_separator))
2303
if (encoder_listencode_obj(s, rval, value, indent_level))
2309
if (PyErr_Occurred())
2311
if (ident != NULL) {
2312
if (PyDict_DelItem(s->markers, ident))
2316
if (s->indent != Py_None) {
2317
/* TODO: DOES NOT RUN */
2320
yield '\n' + (_indent * _current_indent_level)
2323
if (PyList_Append(rval, close_dict))
2328
Py_XDECREF(encoded);
2338
encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2340
/* Encode Python list seq to a JSON term, rval is a PyList */
2341
static PyObject *open_array = NULL;
2342
static PyObject *close_array = NULL;
2343
static PyObject *empty_array = NULL;
2344
PyObject *ident = NULL;
2345
PyObject *iter = NULL;
2346
PyObject *obj = NULL;
2350
if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2351
open_array = PyString_InternFromString("[");
2352
close_array = PyString_InternFromString("]");
2353
empty_array = PyString_InternFromString("[]");
2354
if (open_array == NULL || close_array == NULL || empty_array == NULL)
2358
is_true = PyObject_IsTrue(seq);
2361
else if (is_true == 0)
2362
return PyList_Append(rval, empty_array);
2364
if (s->markers != Py_None) {
2366
ident = PyLong_FromVoidPtr(seq);
2369
has_key = PyDict_Contains(s->markers, ident);
2372
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2375
if (PyDict_SetItem(s->markers, ident, seq)) {
2380
iter = PyObject_GetIter(seq);
2384
if (PyList_Append(rval, open_array))
2386
if (s->indent != Py_None) {
2387
/* TODO: DOES NOT RUN */
2390
newline_indent = '\n' + (_indent * _current_indent_level)
2391
separator = _item_separator + newline_indent
2392
buf += newline_indent
2395
while ((obj = PyIter_Next(iter))) {
2397
if (PyList_Append(rval, s->item_separator))
2400
if (encoder_listencode_obj(s, rval, obj, indent_level))
2406
if (PyErr_Occurred())
2408
if (ident != NULL) {
2409
if (PyDict_DelItem(s->markers, ident))
2413
if (s->indent != Py_None) {
2414
/* TODO: DOES NOT RUN */
2417
yield '\n' + (_indent * _current_indent_level)
2420
if (PyList_Append(rval, close_array))
2432
encoder_dealloc(PyObject *self)
2434
/* Deallocate Encoder */
2435
encoder_clear(self);
2436
Py_TYPE(self)->tp_free(self);
2440
encoder_traverse(PyObject *self, visitproc visit, void *arg)
2443
assert(PyEncoder_Check(self));
2444
s = (PyEncoderObject *)self;
2445
Py_VISIT(s->markers);
2446
Py_VISIT(s->defaultfn);
2447
Py_VISIT(s->encoder);
2448
Py_VISIT(s->indent);
2449
Py_VISIT(s->key_separator);
2450
Py_VISIT(s->item_separator);
2451
Py_VISIT(s->sort_keys);
2452
Py_VISIT(s->skipkeys);
2453
Py_VISIT(s->key_memo);
2458
encoder_clear(PyObject *self)
2460
/* Deallocate Encoder */
2462
assert(PyEncoder_Check(self));
2463
s = (PyEncoderObject *)self;
2464
Py_CLEAR(s->markers);
2465
Py_CLEAR(s->defaultfn);
2466
Py_CLEAR(s->encoder);
2467
Py_CLEAR(s->indent);
2468
Py_CLEAR(s->key_separator);
2469
Py_CLEAR(s->item_separator);
2470
Py_CLEAR(s->sort_keys);
2471
Py_CLEAR(s->skipkeys);
2472
Py_CLEAR(s->key_memo);
2476
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2479
PyTypeObject PyEncoderType = {
2480
PyObject_HEAD_INIT(NULL)
2481
0, /* tp_internal */
2482
"simplejson._speedups.Encoder", /* tp_name */
2483
sizeof(PyEncoderObject), /* tp_basicsize */
2484
0, /* tp_itemsize */
2485
encoder_dealloc, /* tp_dealloc */
2491
0, /* tp_as_number */
2492
0, /* tp_as_sequence */
2493
0, /* tp_as_mapping */
2495
encoder_call, /* tp_call */
2497
0, /* tp_getattro */
2498
0, /* tp_setattro */
2499
0, /* tp_as_buffer */
2500
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2501
encoder_doc, /* tp_doc */
2502
encoder_traverse, /* tp_traverse */
2503
encoder_clear, /* tp_clear */
2504
0, /* tp_richcompare */
2505
0, /* tp_weaklistoffset */
2507
0, /* tp_iternext */
2509
encoder_members, /* tp_members */
2513
0, /* tp_descr_get */
2514
0, /* tp_descr_set */
2515
0, /* tp_dictoffset */
2516
encoder_init, /* tp_init */
2518
encoder_new, /* tp_new */
2522
static PyMethodDef speedups_methods[] = {
2523
{"encode_basestring_ascii",
2524
(PyCFunction)py_encode_basestring_ascii,
2526
pydoc_encode_basestring_ascii},
2528
(PyCFunction)py_scanstring,
2531
{NULL, NULL, 0, NULL}
2534
PyDoc_STRVAR(module_doc,
2535
"simplejson speedups\n");
2540
PyObject *m, *decimal;
2541
PyScannerType.tp_new = PyType_GenericNew;
2542
if (PyType_Ready(&PyScannerType) < 0)
2544
PyEncoderType.tp_new = PyType_GenericNew;
2545
if (PyType_Ready(&PyEncoderType) < 0)
2548
decimal = PyImport_ImportModule("decimal");
2549
if (decimal == NULL)
2551
DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal");
2553
if (DecimalTypePtr == NULL)
2556
m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2557
Py_INCREF((PyObject*)&PyScannerType);
2558
PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2559
Py_INCREF((PyObject*)&PyEncoderType);
2560
PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);