1
%% @author Bob Ippolito <bob@mochimedia.com>
2
%% @copyright 2007 Mochi Media, Inc.
4
%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
5
%% with binaries as strings, arrays as lists (without an {array, _})
6
%% wrapper and it only knows how to decode UTF-8 (and ASCII).
9
-author('bob@mochimedia.com').
10
-export([encoder/1, encode/1]).
11
-export([decoder/1, decode/1]).
14
% This is a macro to placate syntax highlighters..
16
-define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
17
column=N+S#decoder.column}).
18
-define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
19
column=1+S#decoder.column}).
20
-define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
22
line=1+S#decoder.line}).
23
-define(INC_CHAR(S, C),
27
line=1+S#decoder.line,
28
offset=1+S#decoder.offset};
30
S#decoder{column=1+S#decoder.column,
31
offset=1+S#decoder.offset}
33
-define(IS_WHITESPACE(C),
34
(C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
36
%% @type iolist() = [char() | binary() | iolist()]
37
%% @type iodata() = iolist() | binary()
38
%% @type json_string() = atom | binary()
39
%% @type json_number() = integer() | float()
40
%% @type json_array() = [json_term()]
41
%% @type json_object() = {struct, [{json_string(), json_term()}]}
42
%% @type json_term() = json_string() | json_number() | json_array() |
45
-record(encoder, {handler=null}).
47
-record(decoder, {object_hook=null,
53
%% @spec encoder([encoder_option()]) -> function()
54
%% @doc Create an encoder/1 with the given options.
56
State = parse_encoder_options(Options, #encoder{}),
57
fun (O) -> json_encode(O, State) end.
59
%% @spec encode(json_term()) -> iolist()
60
%% @doc Encode the given as JSON to an iolist.
62
json_encode(Any, #encoder{}).
64
%% @spec decoder([decoder_option()]) -> function()
65
%% @doc Create a decoder/1 with the given options.
67
State = parse_decoder_options(Options, #decoder{}),
68
fun (O) -> json_decode(O, State) end.
70
%% @spec decode(iolist()) -> json_term()
71
%% @doc Decode the given iolist to Erlang terms.
73
json_decode(S, #decoder{}).
80
parse_encoder_options([], State) ->
82
parse_encoder_options([{handler, Handler} | Rest], State) ->
83
parse_encoder_options(Rest, State#encoder{handler=Handler}).
85
parse_decoder_options([], State) ->
87
parse_decoder_options([{object_hook, Hook} | Rest], State) ->
88
parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
90
json_encode(true, _State) ->
92
json_encode(false, _State) ->
94
json_encode(null, _State) ->
96
json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 ->
97
%% Anything outside of 32-bit integers should be encoded as a float
99
json_encode(I, _State) when is_integer(I) ->
100
mochinum:digits(float(I));
101
json_encode(F, _State) when is_float(F) ->
103
json_encode(S, State) when is_binary(S); is_atom(S) ->
104
json_encode_string(S, State);
105
json_encode(Array, State) when is_list(Array) ->
106
json_encode_array(Array, State);
107
json_encode({struct, Props}, State) when is_list(Props) ->
108
json_encode_proplist(Props, State);
109
json_encode(Bad, #encoder{handler=null}) ->
110
exit({json_encode, {bad_term, Bad}});
111
json_encode(Bad, State=#encoder{handler=Handler}) ->
112
json_encode(Handler(Bad), State).
114
json_encode_array([], _State) ->
116
json_encode_array(L, State) ->
118
[$,, json_encode(O, State) | Acc]
120
[$, | Acc1] = lists:foldl(F, "[", L),
121
lists:reverse([$\] | Acc1]).
123
json_encode_proplist([], _State) ->
125
json_encode_proplist(Props, State) ->
126
F = fun ({K, V}, Acc) ->
127
KS = json_encode_string(K, State),
128
VS = json_encode(V, State),
129
[$,, VS, $:, KS | Acc]
131
[$, | Acc1] = lists:foldl(F, "{", Props),
132
lists:reverse([$\} | Acc1]).
134
json_encode_string(A, _State) when is_atom(A) ->
135
json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)), [?Q]);
136
json_encode_string(B, _State) when is_binary(B) ->
137
json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q]);
138
json_encode_string(I, _State) when is_integer(I) ->
139
json_encode_string_unicode(integer_to_list(I), [?Q]);
140
json_encode_string(L, _State) when is_list(L) ->
141
json_encode_string_unicode(L, [?Q]).
143
json_encode_string_unicode([], Acc) ->
144
lists:reverse([$\" | Acc]);
145
json_encode_string_unicode([C | Cs], Acc) ->
149
%% Escaping solidus is only useful when trying to protect
150
%% against "</script>" injection attacks which are only
151
%% possible when JSON is inserted into a HTML document
152
%% in-line. mochijson2 does not protect you from this, so
153
%% if you do insert directly into HTML then you need to
154
%% uncomment the following case or escape the output of encode.
171
C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
176
exit({json_encode, {bad_char, C}})
178
json_encode_string_unicode(Cs, Acc1).
180
hexdigit(C) when C >= 0, C =< 9 ->
182
hexdigit(C) when C =< 15 ->
185
unihex(C) when C < 16#10000 ->
186
<<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
187
Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
189
unihex(C) when C =< 16#10FFFF ->
191
S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
192
S2 = 16#dc00 bor (N band 16#3ff),
193
[unihex(S1), unihex(S2)].
195
json_decode(L, S) when is_list(L) ->
196
json_decode(iolist_to_binary(L), S);
198
{Res, S1} = decode1(B, S),
199
{eof, _} = tokenize(B, S1#decoder{state=trim}),
202
decode1(B, S=#decoder{state=null}) ->
203
case tokenize(B, S#decoder{state=any}) of
208
{start_object, S1} ->
212
make_object(V, #decoder{object_hook=null}) ->
214
make_object(V, #decoder{object_hook=Hook}) ->
217
decode_object(B, S) ->
218
decode_object(B, S#decoder{state=key}, []).
220
decode_object(B, S=#decoder{state=key}, Acc) ->
221
case tokenize(B, S) of
223
V = make_object({struct, lists:reverse(Acc)}, S1),
224
{V, S1#decoder{state=null}};
226
{colon, S2} = tokenize(B, S1),
227
{V, S3} = decode1(B, S2#decoder{state=null}),
228
decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
230
decode_object(B, S=#decoder{state=comma}, Acc) ->
231
case tokenize(B, S) of
233
V = make_object({struct, lists:reverse(Acc)}, S1),
234
{V, S1#decoder{state=null}};
236
decode_object(B, S1#decoder{state=key}, Acc)
239
decode_array(B, S) ->
240
decode_array(B, S#decoder{state=any}, []).
242
decode_array(B, S=#decoder{state=any}, Acc) ->
243
case tokenize(B, S) of
245
{lists:reverse(Acc), S1#decoder{state=null}};
247
{Array, S2} = decode_array(B, S1),
248
decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
249
{start_object, S1} ->
250
{Array, S2} = decode_object(B, S1),
251
decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
252
{{const, Const}, S1} ->
253
decode_array(B, S1#decoder{state=comma}, [Const | Acc])
255
decode_array(B, S=#decoder{state=comma}, Acc) ->
256
case tokenize(B, S) of
258
{lists:reverse(Acc), S1#decoder{state=null}};
260
decode_array(B, S1#decoder{state=any}, Acc)
263
tokenize_string(B, S) ->
264
tokenize_string(B, S, []).
266
tokenize_string(B, S=#decoder{offset=O}, Acc) ->
268
<<_:O/binary, ?Q, _/binary>> ->
269
{{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
270
<<_:O/binary, "\\\"", _/binary>> ->
271
tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
272
<<_:O/binary, "\\\\", _/binary>> ->
273
tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
274
<<_:O/binary, "\\/", _/binary>> ->
275
tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
276
<<_:O/binary, "\\b", _/binary>> ->
277
tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
278
<<_:O/binary, "\\f", _/binary>> ->
279
tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
280
<<_:O/binary, "\\n", _/binary>> ->
281
tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
282
<<_:O/binary, "\\r", _/binary>> ->
283
tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
284
<<_:O/binary, "\\t", _/binary>> ->
285
tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
286
<<_:O/binary, "\\u", C3, C2, C1, C0, _/binary>> ->
287
%% coalesce UTF-16 surrogate pair?
288
C = erlang:list_to_integer([C3, C2, C1, C0], 16),
289
Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
290
tokenize_string(B, ?ADV_COL(S, 6), Acc1);
291
<<_:O/binary, C, _/binary>> ->
292
tokenize_string(B, ?INC_CHAR(S, C), [C | Acc])
295
tokenize_number(B, S) ->
296
case tokenize_number(B, sign, S, []) of
298
{{const, list_to_integer(Int)}, S1};
299
{{float, Float}, S1} ->
300
{{const, list_to_float(Float)}, S1}
303
tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
305
<<_:O/binary, $-, _/binary>> ->
306
tokenize_number(B, int, ?INC_COL(S), [$-]);
308
tokenize_number(B, int, S, [])
310
tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
312
<<_:O/binary, $0, _/binary>> ->
313
tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
314
<<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
315
tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
317
tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
319
<<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
320
tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
322
tokenize_number(B, frac, S, Acc)
324
tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
326
<<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
327
tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
328
<<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
329
tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
331
{{int, lists:reverse(Acc)}, S}
333
tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
335
<<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
336
tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
337
<<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
338
tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
340
{{float, lists:reverse(Acc)}, S}
342
tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
344
<<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
345
tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
347
tokenize_number(B, eint, S, Acc)
349
tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
351
<<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
352
tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
354
tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
356
<<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
357
tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
359
{{float, lists:reverse(Acc)}, S}
362
tokenize(B, S=#decoder{offset=O}) ->
364
<<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
365
tokenize(B, ?INC_CHAR(S, C));
366
<<_:O/binary, "{", _/binary>> ->
367
{start_object, ?INC_COL(S)};
368
<<_:O/binary, "}", _/binary>> ->
369
{end_object, ?INC_COL(S)};
370
<<_:O/binary, "[", _/binary>> ->
371
{start_array, ?INC_COL(S)};
372
<<_:O/binary, "]", _/binary>> ->
373
{end_array, ?INC_COL(S)};
374
<<_:O/binary, ",", _/binary>> ->
375
{comma, ?INC_COL(S)};
376
<<_:O/binary, ":", _/binary>> ->
377
{colon, ?INC_COL(S)};
378
<<_:O/binary, "null", _/binary>> ->
379
{{const, null}, ?ADV_COL(S, 4)};
380
<<_:O/binary, "true", _/binary>> ->
381
{{const, true}, ?ADV_COL(S, 4)};
382
<<_:O/binary, "false", _/binary>> ->
383
{{const, false}, ?ADV_COL(S, 5)};
384
<<_:O/binary, "\"", _/binary>> ->
385
tokenize_string(B, ?INC_COL(S));
386
<<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
388
tokenize_number(B, S);
390
trim = S#decoder.state,
394
%% testing constructs borrowed from the Yaws JSON implementation.
396
%% Create an object from a list of Key/Value pairs.
401
is_obj({struct, Props}) ->
402
F = fun ({K, _}) when is_binary(K) ->
409
obj_from_list(Props) ->
410
Obj = {struct, Props},
413
false -> exit({json_bad_object, Obj})
416
%% Test for equivalence of Erlang terms.
417
%% Due to arbitrary order of construction, equivalent objects might
418
%% compare unequal as erlang terms, so we need to carefully recurse
419
%% through aggregates (tuples and objects).
421
equiv({struct, Props1}, {struct, Props2}) ->
422
equiv_object(Props1, Props2);
423
equiv(L1, L2) when is_list(L1), is_list(L2) ->
425
equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
426
equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
427
equiv(true, true) -> true;
428
equiv(false, false) -> true;
429
equiv(null, null) -> true.
431
%% Object representation and traversal order is unknown.
432
%% Use the sledgehammer and sort property lists.
434
equiv_object(Props1, Props2) ->
435
L1 = lists:keysort(1, Props1),
436
L2 = lists:keysort(1, Props2),
437
Pairs = lists:zip(L1, L2),
438
true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
439
equiv(K1, K2) and equiv(V1, V2)
442
%% Recursively compare tuple elements for equivalence.
444
equiv_list([], []) ->
446
equiv_list([V1 | L1], [V2 | L2]) ->
447
case equiv(V1, V2) of
455
[1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
456
test_one(e2j_test_vec(utf8), 1).
459
%% io:format("~p tests passed~n", [N-1]),
461
test_one([{E, J} | Rest], N) ->
462
%% io:format("[~p] ~p ~p~n", [N, E, J]),
463
true = equiv(E, decode(J)),
464
true = equiv(E, decode(encode(E))),
467
e2j_test_vec(utf8) ->
470
{3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
472
{-3.1416, "-3.14160"},
473
{12.0e10, "1.20000e+11"},
474
{1.234E+10, "1.23400e+10"},
475
{-1.234E-10, "-1.23400e-10"},
477
{123.456, "1.23456E+2"},
479
{<<"foo">>, "\"foo\""},
480
{<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
482
{<<"\n\n\n">>, "\"\\n\\n\\n\""},
483
{<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
485
{obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
486
{obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
487
"{\"foo\":\"bar\",\"baz\":123}"},
490
{[1, <<"foo">>], "[1,\"foo\"]"},
492
%% json array in a json object
493
{obj_from_list([{<<"foo">>, [123]}]),
496
%% json object in a json object
497
{obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
498
"{\"foo\":{\"bar\":true}}"},
500
%% fold evaluation order
501
{obj_from_list([{<<"foo">>, []},
502
{<<"bar">>, obj_from_list([{<<"baz">>, true}])},
503
{<<"alice">>, <<"bob">>}]),
504
"{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
506
%% json object in a json array
507
{[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
508
"[-123,\"foo\",{\"bar\":[]},null]"}