1
"""Implementation of JSONDecoder
7
from simplejson.scanner import make_scanner
9
from simplejson._speedups import scanstring as c_scanstring
13
__all__ = ['JSONDecoder']
15
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
17
def _floatconstants():
18
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19
if sys.byteorder != 'big':
20
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21
nan, inf = struct.unpack('dd', _BYTES)
24
NaN, PosInf, NegInf = _floatconstants()
27
def linecol(doc, pos):
28
lineno = doc.count('\n', 0, pos) + 1
32
colno = pos - doc.rindex('\n', 0, pos)
36
def errmsg(msg, doc, pos, end=None):
37
# Note that this function is called from _speedups
38
lineno, colno = linecol(doc, pos)
40
#fmt = '{0}: line {1} column {2} (char {3})'
41
#return fmt.format(msg, lineno, colno, pos)
42
fmt = '%s: line %d column %d (char %d)'
43
return fmt % (msg, lineno, colno, pos)
44
endlineno, endcolno = linecol(doc, end)
45
#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46
#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
57
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
59
'"': u'"', '\\': u'\\', '/': u'/',
60
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
63
DEFAULT_ENCODING = "utf-8"
65
def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
66
"""Scan the string s for a JSON string. End is the index of the
67
character in s after the quote that started the JSON string.
68
Unescapes all valid JSON string escape sequences and raises ValueError
69
on attempt to decode an invalid string. If strict is False then literal
70
control characters are allowed in the string.
72
Returns a tuple of the decoded string and the index of the character in s
73
after the end quote."""
75
encoding = DEFAULT_ENCODING
77
_append = chunks.append
83
errmsg("Unterminated string starting at", s, begin))
85
content, terminator = chunk.groups()
86
# Content is contains zero or more unescaped string characters
88
if not isinstance(content, unicode):
89
content = unicode(content, encoding)
91
# Terminator is the end of string, a literal control character,
92
# or a backslash denoting that an escape sequence follows
95
elif terminator != '\\':
97
msg = "Invalid control character %r at" % (terminator,)
98
#msg = "Invalid control character {0!r} at".format(terminator)
99
raise ValueError(errmsg(msg, s, end))
107
errmsg("Unterminated string starting at", s, begin))
108
# If not a unicode escape sequence, must be in the lookup table
113
msg = "Invalid \\escape: " + repr(esc)
114
raise ValueError(errmsg(msg, s, end))
117
# Unicode escape sequence
118
esc = s[end + 1:end + 5]
121
msg = "Invalid \\uXXXX escape"
122
raise ValueError(errmsg(msg, s, end))
124
# Check for surrogate pair on UCS-4 systems
125
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
126
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
127
if not s[end + 5:end + 7] == '\\u':
128
raise ValueError(errmsg(msg, s, end))
129
esc2 = s[end + 7:end + 11]
131
raise ValueError(errmsg(msg, s, end))
133
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
137
# Append the unescaped character
139
return u''.join(chunks), end
142
# Use speedup if available
143
scanstring = c_scanstring or py_scanstring
145
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
146
WHITESPACE_STR = ' \t\n\r'
148
def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
150
# Use a slice to prevent IndexError from being raised, the following
151
# check will raise a more specific ValueError if the string is empty
152
nextchar = s[end:end + 1]
153
# Normally we expect nextchar == '"'
156
end = _w(s, end).end()
157
nextchar = s[end:end + 1]
158
# Trivial empty object
160
return pairs, end + 1
161
elif nextchar != '"':
162
raise ValueError(errmsg("Expecting property name", s, end))
165
key, end = scanstring(s, end, encoding, strict)
167
# To skip some function call overhead we optimize the fast paths where
168
# the JSON key separator is ": " or just ":".
169
if s[end:end + 1] != ':':
170
end = _w(s, end).end()
171
if s[end:end + 1] != ':':
172
raise ValueError(errmsg("Expecting : delimiter", s, end))
180
end = _w(s, end + 1).end()
185
value, end = scan_once(s, end)
186
except StopIteration:
187
raise ValueError(errmsg("Expecting object", s, end))
193
end = _w(s, end + 1).end()
201
elif nextchar != ',':
202
raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
210
end = _w(s, end + 1).end()
217
raise ValueError(errmsg("Expecting property name", s, end - 1))
219
if object_hook is not None:
220
pairs = object_hook(pairs)
223
def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
225
nextchar = s[end:end + 1]
227
end = _w(s, end + 1).end()
228
nextchar = s[end:end + 1]
229
# Look-ahead for trivial empty array
231
return values, end + 1
232
_append = values.append
235
value, end = scan_once(s, end)
236
except StopIteration:
237
raise ValueError(errmsg("Expecting object", s, end))
239
nextchar = s[end:end + 1]
241
end = _w(s, end + 1).end()
242
nextchar = s[end:end + 1]
246
elif nextchar != ',':
247
raise ValueError(errmsg("Expecting , delimiter", s, end))
253
end = _w(s, end + 1).end()
259
class JSONDecoder(object):
260
"""Simple JSON <http://json.org> decoder
262
Performs the following translations in decoding by default:
264
+---------------+-------------------+
266
+===============+===================+
268
+---------------+-------------------+
270
+---------------+-------------------+
272
+---------------+-------------------+
273
| number (int) | int, long |
274
+---------------+-------------------+
275
| number (real) | float |
276
+---------------+-------------------+
278
+---------------+-------------------+
280
+---------------+-------------------+
282
+---------------+-------------------+
284
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
285
their corresponding ``float`` values, which is outside the JSON spec.
289
def __init__(self, encoding=None, object_hook=None, parse_float=None,
290
parse_int=None, parse_constant=None, strict=True):
291
"""``encoding`` determines the encoding used to interpret any ``str``
292
objects decoded by this instance (utf-8 by default). It has no
293
effect when decoding ``unicode`` objects.
295
Note that currently only encodings that are a superset of ASCII work,
296
strings of other encodings should be passed in as ``unicode``.
298
``object_hook``, if specified, will be called with the result
299
of every JSON object decoded and its return value will be used in
300
place of the given ``dict``. This can be used to provide custom
301
deserializations (e.g. to support JSON-RPC class hinting).
303
``parse_float``, if specified, will be called with the string
304
of every JSON float to be decoded. By default this is equivalent to
305
float(num_str). This can be used to use another datatype or parser
306
for JSON floats (e.g. decimal.Decimal).
308
``parse_int``, if specified, will be called with the string
309
of every JSON int to be decoded. By default this is equivalent to
310
int(num_str). This can be used to use another datatype or parser
311
for JSON integers (e.g. float).
313
``parse_constant``, if specified, will be called with one of the
314
following strings: -Infinity, Infinity, NaN.
315
This can be used to raise an exception if invalid JSON numbers
319
self.encoding = encoding
320
self.object_hook = object_hook
321
self.parse_float = parse_float or float
322
self.parse_int = parse_int or int
323
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
325
self.parse_object = JSONObject
326
self.parse_array = JSONArray
327
self.parse_string = scanstring
328
self.scan_once = make_scanner(self)
330
def decode(self, s, _w=WHITESPACE.match):
331
"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
332
instance containing a JSON document)
335
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
336
end = _w(s, end).end()
338
raise ValueError(errmsg("Extra data", s, end, len(s)))
341
def raw_decode(self, s, idx=0):
342
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
343
with a JSON document) and return a 2-tuple of the Python
344
representation and the index in ``s`` where the document ended.
346
This can be used to decode a JSON document from a string that may
347
have extraneous data at the end.
351
obj, end = self.scan_once(s, idx)
352
except StopIteration:
353
raise ValueError("No JSON object could be decoded")