1
"""Implementation of JSONDecoder
7
from simplejson.scanner import make_scanner
8
def _import_c_scanstring():
10
from simplejson._speedups import scanstring
14
c_scanstring = _import_c_scanstring()
16
__all__ = ['JSONDecoder']
18
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
20
def _floatconstants():
21
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
22
# The struct module in Python 2.4 would get frexp() out of range here
23
# when an endian is specified in the format string. Fixed in Python 2.5+
24
if sys.byteorder != 'big':
25
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
26
nan, inf = struct.unpack('dd', _BYTES)
29
NaN, PosInf, NegInf = _floatconstants()
32
class JSONDecodeError(ValueError):
33
"""Subclass of ValueError with the following additional properties:
35
msg: The unformatted error message
36
doc: The JSON document being parsed
37
pos: The start index of doc where parsing failed
38
end: The end index of doc where parsing failed (may be None)
39
lineno: The line corresponding to pos
40
colno: The column corresponding to pos
41
endlineno: The line corresponding to end (may be None)
42
endcolno: The column corresponding to end (may be None)
45
def __init__(self, msg, doc, pos, end=None):
46
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
51
self.lineno, self.colno = linecol(doc, pos)
53
self.endlineno, self.endcolno = linecol(doc, pos)
55
self.endlineno, self.endcolno = None, None
58
def linecol(doc, pos):
59
lineno = doc.count('\n', 0, pos) + 1
63
colno = pos - doc.rindex('\n', 0, pos)
67
def errmsg(msg, doc, pos, end=None):
68
# Note that this function is called from _speedups
69
lineno, colno = linecol(doc, pos)
71
#fmt = '{0}: line {1} column {2} (char {3})'
72
#return fmt.format(msg, lineno, colno, pos)
73
fmt = '%s: line %d column %d (char %d)'
74
return fmt % (msg, lineno, colno, pos)
75
endlineno, endcolno = linecol(doc, end)
76
#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
77
#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
78
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
79
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
88
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
90
'"': u'"', '\\': u'\\', '/': u'/',
91
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
94
DEFAULT_ENCODING = "utf-8"
96
def py_scanstring(s, end, encoding=None, strict=True,
97
_b=BACKSLASH, _m=STRINGCHUNK.match):
98
"""Scan the string s for a JSON string. End is the index of the
99
character in s after the quote that started the JSON string.
100
Unescapes all valid JSON string escape sequences and raises ValueError
101
on attempt to decode an invalid string. If strict is False then literal
102
control characters are allowed in the string.
104
Returns a tuple of the decoded string and the index of the character in s
105
after the end quote."""
107
encoding = DEFAULT_ENCODING
109
_append = chunks.append
114
raise JSONDecodeError(
115
"Unterminated string starting at", s, begin)
117
content, terminator = chunk.groups()
118
# Content is contains zero or more unescaped string characters
120
if not isinstance(content, unicode):
121
content = unicode(content, encoding)
123
# Terminator is the end of string, a literal control character,
124
# or a backslash denoting that an escape sequence follows
125
if terminator == '"':
127
elif terminator != '\\':
129
msg = "Invalid control character %r at" % (terminator,)
130
#msg = "Invalid control character {0!r} at".format(terminator)
131
raise JSONDecodeError(msg, s, end)
138
raise JSONDecodeError(
139
"Unterminated string starting at", s, begin)
140
# If not a unicode escape sequence, must be in the lookup table
145
msg = "Invalid \\escape: " + repr(esc)
146
raise JSONDecodeError(msg, s, end)
149
# Unicode escape sequence
150
esc = s[end + 1:end + 5]
153
msg = "Invalid \\uXXXX escape"
154
raise JSONDecodeError(msg, s, end)
156
# Check for surrogate pair on UCS-4 systems
157
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
158
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
159
if not s[end + 5:end + 7] == '\\u':
160
raise JSONDecodeError(msg, s, end)
161
esc2 = s[end + 7:end + 11]
163
raise JSONDecodeError(msg, s, end)
165
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
169
# Append the unescaped character
171
return u''.join(chunks), end
174
# Use speedup if available
175
scanstring = c_scanstring or py_scanstring
177
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
178
WHITESPACE_STR = ' \t\n\r'
180
def JSONObject((s, end), encoding, strict, scan_once, object_hook,
181
object_pairs_hook, memo=None,
182
_w=WHITESPACE.match, _ws=WHITESPACE_STR):
183
# Backwards compatibility
186
memo_get = memo.setdefault
188
# Use a slice to prevent IndexError from being raised, the following
189
# check will raise a more specific ValueError if the string is empty
190
nextchar = s[end:end + 1]
191
# Normally we expect nextchar == '"'
194
end = _w(s, end).end()
195
nextchar = s[end:end + 1]
196
# Trivial empty object
198
if object_pairs_hook is not None:
199
result = object_pairs_hook(pairs)
202
if object_hook is not None:
203
pairs = object_hook(pairs)
204
return pairs, end + 1
205
elif nextchar != '"':
206
raise JSONDecodeError("Expecting property name", s, end)
209
key, end = scanstring(s, end, encoding, strict)
210
key = memo_get(key, key)
212
# To skip some function call overhead we optimize the fast paths where
213
# the JSON key separator is ": " or just ":".
214
if s[end:end + 1] != ':':
215
end = _w(s, end).end()
216
if s[end:end + 1] != ':':
217
raise JSONDecodeError("Expecting : delimiter", s, end)
225
end = _w(s, end + 1).end()
230
value, end = scan_once(s, end)
231
except StopIteration:
232
raise JSONDecodeError("Expecting object", s, end)
233
pairs.append((key, value))
238
end = _w(s, end + 1).end()
246
elif nextchar != ',':
247
raise JSONDecodeError("Expecting , delimiter", s, end - 1)
255
end = _w(s, end + 1).end()
262
raise JSONDecodeError("Expecting property name", s, end - 1)
264
if object_pairs_hook is not None:
265
result = object_pairs_hook(pairs)
268
if object_hook is not None:
269
pairs = object_hook(pairs)
272
def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
274
nextchar = s[end:end + 1]
276
end = _w(s, end + 1).end()
277
nextchar = s[end:end + 1]
278
# Look-ahead for trivial empty array
280
return values, end + 1
281
_append = values.append
284
value, end = scan_once(s, end)
285
except StopIteration:
286
raise JSONDecodeError("Expecting object", s, end)
288
nextchar = s[end:end + 1]
290
end = _w(s, end + 1).end()
291
nextchar = s[end:end + 1]
295
elif nextchar != ',':
296
raise JSONDecodeError("Expecting , delimiter", s, end)
302
end = _w(s, end + 1).end()
308
class JSONDecoder(object):
309
"""Simple JSON <http://json.org> decoder
311
Performs the following translations in decoding by default:
313
+---------------+-------------------+
315
+===============+===================+
317
+---------------+-------------------+
319
+---------------+-------------------+
321
+---------------+-------------------+
322
| number (int) | int, long |
323
+---------------+-------------------+
324
| number (real) | float |
325
+---------------+-------------------+
327
+---------------+-------------------+
329
+---------------+-------------------+
331
+---------------+-------------------+
333
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
334
their corresponding ``float`` values, which is outside the JSON spec.
338
def __init__(self, encoding=None, object_hook=None, parse_float=None,
339
parse_int=None, parse_constant=None, strict=True,
340
object_pairs_hook=None):
342
*encoding* determines the encoding used to interpret any
343
:class:`str` objects decoded by this instance (``'utf-8'`` by
344
default). It has no effect when decoding :class:`unicode` objects.
346
Note that currently only encodings that are a superset of ASCII work,
347
strings of other encodings should be passed in as :class:`unicode`.
349
*object_hook*, if specified, will be called with the result of every
350
JSON object decoded and its return value will be used in place of the
351
given :class:`dict`. This can be used to provide custom
352
deserializations (e.g. to support JSON-RPC class hinting).
354
*object_pairs_hook* is an optional function that will be called with
355
the result of any object literal decode with an ordered list of pairs.
356
The return value of *object_pairs_hook* will be used instead of the
357
:class:`dict`. This feature can be used to implement custom decoders
358
that rely on the order that the key and value pairs are decoded (for
359
example, :func:`collections.OrderedDict` will remember the order of
360
insertion). If *object_hook* is also defined, the *object_pairs_hook*
363
*parse_float*, if specified, will be called with the string of every
364
JSON float to be decoded. By default, this is equivalent to
365
``float(num_str)``. This can be used to use another datatype or parser
366
for JSON floats (e.g. :class:`decimal.Decimal`).
368
*parse_int*, if specified, will be called with the string of every
369
JSON int to be decoded. By default, this is equivalent to
370
``int(num_str)``. This can be used to use another datatype or parser
371
for JSON integers (e.g. :class:`float`).
373
*parse_constant*, if specified, will be called with one of the
374
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
375
can be used to raise an exception if invalid JSON numbers are
378
*strict* controls the parser's behavior when it encounters an
379
invalid control character in a string. The default setting of
380
``True`` means that unescaped control characters are parse errors, if
381
``False`` then control characters will be allowed in strings.
384
self.encoding = encoding
385
self.object_hook = object_hook
386
self.object_pairs_hook = object_pairs_hook
387
self.parse_float = parse_float or float
388
self.parse_int = parse_int or int
389
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
391
self.parse_object = JSONObject
392
self.parse_array = JSONArray
393
self.parse_string = scanstring
395
self.scan_once = make_scanner(self)
397
def decode(self, s, _w=WHITESPACE.match):
398
"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
399
instance containing a JSON document)
402
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
403
end = _w(s, end).end()
405
raise JSONDecodeError("Extra data", s, end, len(s))
408
def raw_decode(self, s, idx=0):
409
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode``
410
beginning with a JSON document) and return a 2-tuple of the Python
411
representation and the index in ``s`` where the document ended.
413
This can be used to decode a JSON document from a string that may
414
have extraneous data at the end.
418
obj, end = self.scan_once(s, idx)
419
except StopIteration:
420
raise JSONDecodeError("No JSON object could be decoded", s, idx)