1
"""Implementation of JSONEncoder
6
from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
8
c_encode_basestring_ascii = None
10
from simplejson._speedups import make_encoder as c_make_encoder
14
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16
HAS_UTF8 = re.compile(r'[\x80-\xff]')
27
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
30
# Assume this produces an infinity on all machines (probably not guaranteed)
31
INFINITY = float('1e66666')
34
def encode_basestring(s):
35
"""Return a JSON representation of a Python string
39
return ESCAPE_DCT[match.group(0)]
40
return '"' + ESCAPE.sub(replace, s) + '"'
43
def py_encode_basestring_ascii(s):
44
"""Return an ASCII-only JSON representation of a Python string
47
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
56
#return '\\u{0:04x}'.format(n)
57
return '\\u%04x' % (n,)
61
s1 = 0xd800 | ((n >> 10) & 0x3ff)
62
s2 = 0xdc00 | (n & 0x3ff)
63
#return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
64
return '\\u%04x\\u%04x' % (s1, s2)
65
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
68
encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
70
class JSONEncoder(object):
71
"""Extensible JSON <http://json.org> encoder for Python data structures.
73
Supports the following objects and types by default:
75
+-------------------+---------------+
77
+===================+===============+
79
+-------------------+---------------+
80
| list, tuple | array |
81
+-------------------+---------------+
82
| str, unicode | string |
83
+-------------------+---------------+
84
| int, long, float | number |
85
+-------------------+---------------+
87
+-------------------+---------------+
89
+-------------------+---------------+
91
+-------------------+---------------+
93
To extend this to recognize other objects, subclass and implement a
94
``.default()`` method with another method that returns a serializable
95
object for ``o`` if possible, otherwise it should call the superclass
96
implementation (to raise ``TypeError``).
101
def __init__(self, skipkeys=False, ensure_ascii=True,
102
check_circular=True, allow_nan=True, sort_keys=False,
103
indent=None, separators=None, encoding='utf-8', default=None):
104
"""Constructor for JSONEncoder, with sensible defaults.
106
If skipkeys is false, then it is a TypeError to attempt
107
encoding of keys that are not str, int, long, float or None. If
108
skipkeys is True, such items are simply skipped.
110
If ensure_ascii is true, the output is guaranteed to be str
111
objects with all incoming unicode characters escaped. If
112
ensure_ascii is false, the output will be unicode object.
114
If check_circular is true, then lists, dicts, and custom encoded
115
objects will be checked for circular references during encoding to
116
prevent an infinite recursion (which would cause an OverflowError).
117
Otherwise, no such check takes place.
119
If allow_nan is true, then NaN, Infinity, and -Infinity will be
120
encoded as such. This behavior is not JSON specification compliant,
121
but is consistent with most JavaScript based encoders and decoders.
122
Otherwise, it will be a ValueError to encode such floats.
124
If sort_keys is true, then the output of dictionaries will be
125
sorted by key; this is useful for regression tests to ensure
126
that JSON serializations can be compared on a day-to-day basis.
128
If indent is a non-negative integer, then JSON array
129
elements and object members will be pretty-printed with that
130
indent level. An indent level of 0 will only insert newlines.
131
None is the most compact representation.
133
If specified, separators should be a (item_separator, key_separator)
134
tuple. The default is (', ', ': '). To get the most compact JSON
135
representation you should specify (',', ':') to eliminate whitespace.
137
If specified, default is a function that gets called for objects
138
that can't otherwise be serialized. It should return a JSON encodable
139
version of the object or raise a ``TypeError``.
141
If encoding is not None, then all input strings will be
142
transformed into unicode using that encoding prior to JSON-encoding.
143
The default is UTF-8.
147
self.skipkeys = skipkeys
148
self.ensure_ascii = ensure_ascii
149
self.check_circular = check_circular
150
self.allow_nan = allow_nan
151
self.sort_keys = sort_keys
153
if separators is not None:
154
self.item_separator, self.key_separator = separators
155
if default is not None:
156
self.default = default
157
self.encoding = encoding
159
def default(self, o):
160
"""Implement this method in a subclass such that it returns
161
a serializable object for ``o``, or calls the base implementation
162
(to raise a ``TypeError``).
164
For example, to support arbitrary iterators, you could
165
implement default like this::
167
def default(self, o):
173
return list(iterable)
174
return JSONEncoder.default(self, o)
177
raise TypeError(repr(o) + " is not JSON serializable")
180
"""Return a JSON string representation of a Python data structure.
182
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
183
'{"foo": ["bar", "baz"]}'
186
# This is for extremely simple cases and benchmarks.
187
if isinstance(o, basestring):
188
if isinstance(o, str):
189
_encoding = self.encoding
190
if (_encoding is not None
191
and not (_encoding == 'utf-8')):
192
o = o.decode(_encoding)
193
if self.ensure_ascii:
194
return encode_basestring_ascii(o)
196
return encode_basestring(o)
197
# This doesn't pass the iterator directly to ''.join() because the
198
# exceptions aren't as detailed. The list call should be roughly
199
# equivalent to the PySequence_Fast that ''.join() would do.
200
chunks = self.iterencode(o, _one_shot=True)
201
if not isinstance(chunks, (list, tuple)):
202
chunks = list(chunks)
203
return ''.join(chunks)
205
def iterencode(self, o, _one_shot=False):
206
"""Encode the given object and yield each string
207
representation as available.
211
for chunk in JSONEncoder().iterencode(bigobject):
212
mysocket.write(chunk)
215
if self.check_circular:
219
if self.ensure_ascii:
220
_encoder = encode_basestring_ascii
222
_encoder = encode_basestring
223
if self.encoding != 'utf-8':
224
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
225
if isinstance(o, str):
226
o = o.decode(_encoding)
227
return _orig_encoder(o)
229
def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
230
# Check for specials. Note that this type of test is processor- and/or
231
# platform-specific, so do tests which don't depend on the internals.
244
"Out of range float values are not JSON compliant: " +
250
if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
251
_iterencode = c_make_encoder(
252
markers, self.default, _encoder, self.indent,
253
self.key_separator, self.item_separator, self.sort_keys,
254
self.skipkeys, self.allow_nan)
256
_iterencode = _make_iterencode(
257
markers, self.default, _encoder, self.indent, floatstr,
258
self.key_separator, self.item_separator, self.sort_keys,
259
self.skipkeys, _one_shot)
260
return _iterencode(o, 0)
262
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
263
## HACK: hand-optimized bytecode; turn globals into locals
266
ValueError=ValueError,
267
basestring=basestring,
272
isinstance=isinstance,
279
def _iterencode_list(lst, _current_indent_level):
283
if markers is not None:
285
if markerid in markers:
286
raise ValueError("Circular reference detected")
287
markers[markerid] = lst
289
if _indent is not None:
290
_current_indent_level += 1
291
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
292
separator = _item_separator + newline_indent
293
buf += newline_indent
295
newline_indent = None
296
separator = _item_separator
303
if isinstance(value, basestring):
304
yield buf + _encoder(value)
311
elif isinstance(value, (int, long)):
312
yield buf + str(value)
313
elif isinstance(value, float):
314
yield buf + _floatstr(value)
317
if isinstance(value, (list, tuple)):
318
chunks = _iterencode_list(value, _current_indent_level)
319
elif isinstance(value, dict):
320
chunks = _iterencode_dict(value, _current_indent_level)
322
chunks = _iterencode(value, _current_indent_level)
325
if newline_indent is not None:
326
_current_indent_level -= 1
327
yield '\n' + (' ' * (_indent * _current_indent_level))
329
if markers is not None:
330
del markers[markerid]
332
def _iterencode_dict(dct, _current_indent_level):
336
if markers is not None:
338
if markerid in markers:
339
raise ValueError("Circular reference detected")
340
markers[markerid] = dct
342
if _indent is not None:
343
_current_indent_level += 1
344
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
345
item_separator = _item_separator + newline_indent
348
newline_indent = None
349
item_separator = _item_separator
353
items.sort(key=lambda kv: kv[0])
355
items = dct.iteritems()
356
for key, value in items:
357
if isinstance(key, basestring):
359
# JavaScript is weakly typed for these, so it makes sense to
360
# also allow them. Many encoders seem to do something like this.
361
elif isinstance(key, float):
369
elif isinstance(key, (int, long)):
374
raise TypeError("key " + repr(key) + " is not a string")
381
if isinstance(value, basestring):
382
yield _encoder(value)
389
elif isinstance(value, (int, long)):
391
elif isinstance(value, float):
392
yield _floatstr(value)
394
if isinstance(value, (list, tuple)):
395
chunks = _iterencode_list(value, _current_indent_level)
396
elif isinstance(value, dict):
397
chunks = _iterencode_dict(value, _current_indent_level)
399
chunks = _iterencode(value, _current_indent_level)
402
if newline_indent is not None:
403
_current_indent_level -= 1
404
yield '\n' + (' ' * (_indent * _current_indent_level))
406
if markers is not None:
407
del markers[markerid]
409
def _iterencode(o, _current_indent_level):
410
if isinstance(o, basestring):
418
elif isinstance(o, (int, long)):
420
elif isinstance(o, float):
422
elif isinstance(o, (list, tuple)):
423
for chunk in _iterencode_list(o, _current_indent_level):
425
elif isinstance(o, dict):
426
for chunk in _iterencode_dict(o, _current_indent_level):
429
if markers is not None:
431
if markerid in markers:
432
raise ValueError("Circular reference detected")
433
markers[markerid] = o
435
for chunk in _iterencode(o, _current_indent_level):
437
if markers is not None:
438
del markers[markerid]