1
{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}
2
{-# OPTIONS_GHC -fno-warn-unused-binds #-}
6
import Data.ByteString.Builder
7
(Builder, byteString, toLazyByteString, charUtf8, word8)
9
#if !MIN_VERSION_base(4,8,0)
10
import Control.Applicative ((*>), (<$>), (<*), pure)
11
import Data.Monoid (mappend, mempty)
14
import Control.Applicative (liftA2)
15
import Control.DeepSeq (NFData(..))
16
import Control.Monad (forM)
17
import Data.Bits ((.|.), shiftL)
18
import Data.ByteString (ByteString)
19
import Data.Char (chr)
20
import Data.List (sort)
21
import Data.Scientific (Scientific)
22
import Data.Text (Text)
23
import Data.Text.Encoding (decodeUtf8')
24
import Data.Vector as Vector (Vector, foldl', fromList)
25
import Data.Word (Word8)
26
import System.Directory (getDirectoryContents)
27
import System.FilePath ((</>), dropExtension)
28
import qualified Data.Attoparsec.Zepto as Z
29
import Data.Binary.Get (Get)
30
import qualified Data.Binary.Parser as BP
31
import qualified Data.ByteString as B
32
import qualified Data.ByteString.Lazy as L
33
import qualified Data.ByteString.Unsafe as B
34
import qualified Data.HashMap.Strict as H
35
import System.Directory (doesDirectoryExist)
36
import Aeson (Value(..))
39
#define CLOSE_CURLY 125
40
#define CLOSE_SQUARE 93
43
#define DOUBLE_QUOTE 34
44
#define OPEN_CURLY 123
45
#define OPEN_SQUARE 91
55
pathTo :: String -> IO FilePath
57
exists <- doesDirectoryExist "bench"
62
data Result a = Error String
67
-- | A JSON \"object\" (key\/value map).
68
type Object = H.HashMap Text Value
70
-- | A JSON \"array\" (sequence).
71
type Array = Vector Value
73
-- | Parse a top-level JSON value. This must be either an object or
74
-- an array, per RFC 4627.
76
-- The conversion of a parsed value to a Haskell value is deferred
77
-- until the Haskell value is needed. This may improve performance if
78
-- only a subset of the results of conversions are needed, but at a
79
-- cost in thunk allocation.
81
json = json_ object_ array_
83
-- | Parse a top-level JSON value. This must be either an object or
84
-- an array, per RFC 4627.
86
-- This is a strict version of 'json' which avoids building up thunks
87
-- during parsing; it performs all conversions immediately. Prefer
88
-- this version if most of the JSON data needs to be accessed.
90
json' = json_ object_' array_'
92
json_ :: Get Value -> Get Value -> Get Value
94
w <- BP.skipSpaces *> BP.satisfy (\w -> w == OPEN_CURLY || w == OPEN_SQUARE)
101
object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value
103
object_' :: Get Value
104
object_' = {-# SCC "object_'" #-} do
105
!vals <- objectValues jstring' value'
112
objectValues :: Get Text -> Get Value -> Get (H.HashMap Text Value)
113
objectValues str val = do
115
let pair = liftA2 (,) (str <* BP.skipSpaces) (BP.word8 COLON *> BP.skipSpaces *> val)
116
H.fromList <$> commaSeparated pair CLOSE_CURLY
117
{-# INLINE objectValues #-}
120
array_ = {-# SCC "array_" #-} Array <$> arrayValues value
123
array_' = {-# SCC "array_'" #-} do
124
!vals <- arrayValues value'
127
commaSeparated :: Get a -> Word8 -> Get [a]
128
commaSeparated item endByte = do
131
then BP.skipN 1 >> return []
135
v <- item <* BP.skipSpaces
136
ch <- BP.satisfy $ \w -> w == COMMA || w == endByte
138
then BP.skipSpaces >> (v:) <$> loop
140
{-# INLINE commaSeparated #-}
142
arrayValues :: Get Value -> Get (Vector Value)
145
Vector.fromList <$> commaSeparated val CLOSE_SQUARE
146
{-# INLINE arrayValues #-}
148
-- | Parse any JSON value. You should usually 'json' in preference to
149
-- this function, as this function relaxes the object-or-array
150
-- requirement of RFC 4627.
152
-- In particular, be careful in using this function if you think your
153
-- code might interoperate with Javascript. A naïve Javascript
154
-- library that parses JSON data using @eval@ is vulnerable to attack
155
-- unless the encoded data represents an object or an array. JSON
156
-- implementations in other languages conform to that same restriction
157
-- to preserve interoperability and security.
162
DOUBLE_QUOTE -> BP.skipN 1 *> (String <$> jstring_)
163
OPEN_CURLY -> BP.skipN 1 *> object_
164
OPEN_SQUARE -> BP.skipN 1 *> array_
165
C_f -> BP.string "false" *> pure (Bool False)
166
C_t -> BP.string "true" *> pure (Bool True)
167
C_n -> BP.string "null" *> pure Null
168
_ | w >= 48 && w <= 57 || w == 45
169
-> Number <$> BP.scientific
170
| otherwise -> fail "not a valid json value"
172
-- | Strict version of 'value'. See also 'json''.
178
!s <- BP.skipN 1 *> jstring_
180
OPEN_CURLY -> BP.skipN 1 *> object_'
181
OPEN_SQUARE -> BP.skipN 1 *> array_'
182
C_f -> BP.string "false" *> pure (Bool False)
183
C_t -> BP.string "true" *> pure (Bool True)
184
C_n -> BP.string "null" *> pure Null
185
_ | w >= 48 && w <= 57 || w == 45
189
| otherwise -> fail "not a valid json value"
191
-- | Parse a quoted JSON string.
193
jstring = BP.word8 DOUBLE_QUOTE *> jstring_
195
-- | Parse a string without a leading quote.
197
jstring_ = {-# SCC "jstring_" #-} do
198
s <- BP.scan False $ \s c -> if s then Just False
199
else if c == DOUBLE_QUOTE
201
else Just (c == BACKSLASH)
202
BP.word8 DOUBLE_QUOTE
203
s1 <- if BACKSLASH `B.elem` s
204
then case Z.parse unescape s of
209
case decodeUtf8' s1 of
211
Left err -> fail $ show err
213
{-# INLINE jstring_ #-}
215
unescape :: Z.Parser ByteString
216
unescape = toByteString <$> go mempty where
218
h <- Z.takeWhile (/=BACKSLASH)
221
let !slash = B.unsafeHead start
222
!t = B.unsafeIndex start 1
223
escape = case B.findIndex (==t) "\"\\/ntbrfu" of
226
if slash /= BACKSLASH || escape == 255
227
then fail "invalid JSON escape sequence"
229
let cont m = go (acc `mappend` byteString h `mappend` m)
232
then cont (word8 (B.unsafeIndex mapping escape))
235
if a < 0xd800 || a > 0xdfff
236
then cont (charUtf8 (chr a))
238
b <- Z.string "\\u" *> hexQuad
239
if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff
240
then let !c = ((a - 0xd800) `shiftL` 10) +
241
(b - 0xdc00) + 0x10000
242
in cont (charUtf8 (chr c))
243
else fail "invalid UTF-16 surrogates"
246
then return (acc `mappend` byteString h)
248
mapping = "\"\\/\n\t\b\r\f"
250
hexQuad :: Z.Parser Int
253
let hex n | w >= C_0 && w <= C_9 = w - C_0
254
| w >= C_a && w <= C_f = w - 87
255
| w >= C_A && w <= C_F = w - 55
257
where w = fromIntegral $ B.unsafeIndex s n
258
a = hex 0; b = hex 1; c = hex 2; d = hex 3
259
if (a .|. b .|. c .|. d) /= 255
260
then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12)
261
else fail "invalid hex escape"
265
-- The 'json' and 'value' parsers decouple identification from
266
-- conversion. Identification occurs immediately (so that an invalid
267
-- JSON document can be rejected as early as possible), but conversion
268
-- to a Haskell value is deferred until that value is needed.
270
-- This decoupling can be time-efficient if only a smallish subset of
271
-- elements in a JSON value need to be inspected, since the cost of
272
-- conversion is zero for uninspected elements. The trade off is an
273
-- increase in memory usage, due to allocation of thunks for values
274
-- that have not yet been converted.
278
-- The 'json'' and 'value'' parsers combine identification with
279
-- conversion. They consume more CPU cycles up front, but have a
280
-- smaller memory footprint.
282
-- | Parse a top-level JSON value followed by optional whitespace and
283
-- end-of-input. See also: 'json'.
285
jsonEOF = json <* BP.skipSpaces
287
-- | Parse a top-level JSON value followed by optional whitespace and
288
-- end-of-input. See also: 'json''.
289
jsonEOF' :: Get Value
290
jsonEOF' = json' <* BP.skipSpaces
292
toByteString :: Builder -> ByteString
293
toByteString = L.toStrict . toLazyByteString
294
{-# INLINE toByteString #-}