~ubuntu-branches/debian/experimental/haskell-binary-parsers/experimental

« back to all changes in this revision

Viewing changes to bench/Aeson.hs

Committer: Package Import Robot
Author(s): Clint Adams
Date: 2016-10-09 02:48:57 UTC
Revision ID: package-import@ubuntu.com-20161009024857-371qoot9dk9der2z

Tags: upstream-0.2.3.0

Import upstream version 0.2.3.0

files added:

ChangeLog.md

Data

Data/Binary

Data/Binary/Parser

Data/Binary/Parser.hs

Data/Binary/Parser/Char8.hs

Data/Binary/Parser/Numeric.hs

Data/Binary/Parser/Word8.hs

LICENSE

README.md

Setup.hs

bench

bench/Aeson.hs

bench/AesonBP.hs

bench/Bench.hs

bench/Common.hs

bench/HttpReq.hs

bench/Network

bench/Network/Wai

bench/Network/Wai/Handler

bench/Network/Wai/Handler/Warp

bench/Network/Wai/Handler/Warp/ReadInt.hs

bench/Network/Wai/Handler/Warp/RequestHeader.hs

bench/http-request.txt

bench/json-data

bench/json-data/buffer-builder.json

bench/json-data/dates-fract.json

bench/json-data/dates.json

bench/json-data/example.json

bench/json-data/geometry.json

bench/json-data/integers.json

bench/json-data/jp10.json

bench/json-data/jp100.json

bench/json-data/jp50.json

bench/json-data/numbers.json

bench/json-data/twitter1.json

bench/json-data/twitter10.json

bench/json-data/twitter100.json

bench/json-data/twitter20.json

bench/json-data/twitter50.json

binary-parsers.cabal

tests

tests/Aeson.hs

tests/AesonBP.hs

tests/JSON.hs

tests/Main.hs

tests/QC

tests/QC/ByteString.hs

tests/QC/Combinator.hs

tests/QC/Common.hs

tests/json-data

tests/json-data/buffer-builder.json

tests/json-data/dates-fract.json

tests/json-data/dates.json

tests/json-data/example.json

tests/json-data/geometry.json

tests/json-data/integers.json

tests/json-data/jp10.json

tests/json-data/jp100.json

tests/json-data/jp50.json

tests/json-data/numbers.json

tests/json-data/twitter1.json

tests/json-data/twitter10.json

tests/json-data/twitter100.json

tests/json-data/twitter20.json

tests/json-data/twitter50.json

Show diffs side-by-side

added added

removed removed

bench/Aeson.hs

{-# LANGUAGE BangPatterns, CPP, OverloadedStrings #-}

{-# OPTIONS_GHC -fno-warn-unused-binds #-}

module Aeson

(

aeson

, aesonLazy

, value'

) where

import Data.ByteString.Builder

(Builder, byteString, toLazyByteString, charUtf8, word8)

#if !MIN_VERSION_base(4,8,0)

import Control.Applicative ((*>), (<$>), (<*), pure)

import Data.Monoid (mappend, mempty)

#endif

import Control.Applicative (liftA2)

import Control.DeepSeq (NFData(..))

import Control.Monad (forM)

import Data.Attoparsec.ByteString.Char8 (Parser, char, endOfInput, scientific,

skipSpace, string)

import Data.Bits ((.|.), shiftL)

import Data.ByteString (ByteString)

import Data.Char (chr)

import Data.List (sort)

import Data.Scientific (Scientific)

import Data.Text (Text)

import Data.Text.Encoding (decodeUtf8')

import Data.Vector as Vector (Vector, foldl', fromList)

import Data.Word (Word8)

import System.Directory (getDirectoryContents, doesDirectoryExist)

import System.FilePath ((</>), dropExtension)

import qualified Data.Attoparsec.ByteString as A

import qualified Data.Attoparsec.Lazy as L

import qualified Data.Attoparsec.Zepto as Z

import qualified Data.ByteString as B

import qualified Data.ByteString.Lazy as L

import qualified Data.ByteString.Unsafe as B

import qualified Data.HashMap.Strict as H

import Criterion.Main

import Common (pathTo)

#define BACKSLASH 92

#define CLOSE_CURLY 125

#define CLOSE_SQUARE 93

#define COMMA 44

#define DOUBLE_QUOTE 34

#define OPEN_CURLY 123

#define OPEN_SQUARE 91

#define C_0 48

#define C_9 57

#define C_A 65

#define C_F 70

#define C_a 97

#define C_f 102

#define C_n 110

#define C_t 116

data Result a = Error String

| Success a

deriving (Eq, Show)

-- | A JSON \"object\" (key\/value map).

type Object = H.HashMap Text Value

-- | A JSON \"array\" (sequence).

type Array = Vector Value

-- | A JSON value represented as a Haskell value.

data Value = Object !Object

| Array !Array

| String !Text

| Number !Scientific

| Bool !Bool

| Null

deriving (Eq, Show)

instance NFData Value where

rnf (Object o) = rnf o

rnf (Array a) = Vector.foldl' (\x y -> rnf y `seq` x) () a

rnf (String s) = rnf s

rnf (Number n) = rnf n

rnf (Bool b) = rnf b

rnf Null = ()

-- | Parse a top-level JSON value. This must be either an object or

-- an array, per RFC 4627.

-- The conversion of a parsed value to a Haskell value is deferred

-- until the Haskell value is needed. This may improve performance if

-- only a subset of the results of conversions are needed, but at a

-- cost in thunk allocation.

json :: Parser Value

json = json_ object_ array_

-- | Parse a top-level JSON value. This must be either an object or

100

-- an array, per RFC 4627.

101

102

-- This is a strict version of 'json' which avoids building up thunks

103

-- during parsing; it performs all conversions immediately. Prefer

104

-- this version if most of the JSON data needs to be accessed.

105

json' :: Parser Value

106

json' = json_ object_' array_'

107

108

json_ :: Parser Value -> Parser Value -> Parser Value

109

json_ obj ary = do

110

w <- skipSpace *> A.satisfy (\w -> w == OPEN_CURLY || w == OPEN_SQUARE)

111

if w == OPEN_CURLY

112

then obj

113

else ary

114

{-# INLINE json_ #-}

115

116

object_ :: Parser Value

117

object_ = {-# SCC "object_" #-} Object <$> objectValues jstring value

118

119

object_' :: Parser Value

120

object_' = {-# SCC "object_'" #-} do

121

!vals <- objectValues jstring' value'

122

return (Object vals)

123

where

124

jstring' = do

125

!s <- jstring

126

return s

127

128

objectValues :: Parser Text -> Parser Value -> Parser (H.HashMap Text Value)

129

objectValues str val = do

130

skipSpace

131

let pair = liftA2 (,) (str <* skipSpace) (char ':' *> skipSpace *> val)

132

H.fromList <$> commaSeparated pair CLOSE_CURLY

133

{-# INLINE objectValues #-}

134

135

array_ :: Parser Value

136

array_ = {-# SCC "array_" #-} Array <$> arrayValues value

137

138

array_' :: Parser Value

139

array_' = {-# SCC "array_'" #-} do

140

!vals <- arrayValues value'

141

return (Array vals)

142

143

commaSeparated :: Parser a -> Word8 -> Parser [a]

144

commaSeparated item endByte = do

145

w <- A.peekWord8'

146

if w == endByte

147

then A.anyWord8 >> return []

148

else loop

149

where

150

loop = do

151

v <- item <* skipSpace

152

ch <- A.satisfy $ \w -> w == COMMA || w == endByte

153

if ch == COMMA

154

then skipSpace >> (v:) <$> loop

155

else return [v]

156

{-# INLINE commaSeparated #-}

157

158

arrayValues :: Parser Value -> Parser (Vector Value)

159

arrayValues val = do

160

skipSpace

161

Vector.fromList <$> commaSeparated val CLOSE_SQUARE

162

{-# INLINE arrayValues #-}

163

164

-- | Parse any JSON value. You should usually 'json' in preference to

165

-- this function, as this function relaxes the object-or-array

166

-- requirement of RFC 4627.

167

168

-- In particular, be careful in using this function if you think your

169

-- code might interoperate with Javascript. A naïve Javascript

170

-- library that parses JSON data using @eval@ is vulnerable to attack

171

-- unless the encoded data represents an object or an array. JSON

172

-- implementations in other languages conform to that same restriction

173

-- to preserve interoperability and security.

174

value :: Parser Value

175

value = do

176

w <- A.peekWord8'

177

case w of

178

DOUBLE_QUOTE -> A.anyWord8 *> (String <$> jstring_)

179

OPEN_CURLY -> A.anyWord8 *> object_

180

OPEN_SQUARE -> A.anyWord8 *> array_

181

C_f -> string "false" *> pure (Bool False)

182

C_t -> string "true" *> pure (Bool True)

183

C_n -> string "null" *> pure Null

184

_ | w >= 48 && w <= 57 || w == 45

185

-> Number <$> scientific

186

| otherwise -> fail "not a valid json value"

187

188

-- | Strict version of 'value'. See also 'json''.

189

value' :: Parser Value

190

value' = do

191

w <- A.peekWord8'

192

case w of

193

DOUBLE_QUOTE -> do

194

!s <- A.anyWord8 *> jstring_

195

return (String s)

196

OPEN_CURLY -> A.anyWord8 *> object_'

197

OPEN_SQUARE -> A.anyWord8 *> array_'

198

C_f -> string "false" *> pure (Bool False)

199

C_t -> string "true" *> pure (Bool True)

200

C_n -> string "null" *> pure Null

201

_ | w >= 48 && w <= 57 || w == 45

202

-> do

203

!n <- scientific

204

return (Number n)

205

| otherwise -> fail "not a valid json value"

206

207

-- | Parse a quoted JSON string.

208

jstring :: Parser Text

209

jstring = A.word8 DOUBLE_QUOTE *> jstring_

210

211

-- | Parse a string without a leading quote.

212

jstring_ :: Parser Text

213

jstring_ = {-# SCC "jstring_" #-} do

214

s <- A.scan False $ \s c -> if s then Just False

215

else if c == DOUBLE_QUOTE

216

then Nothing

217

else Just (c == BACKSLASH)

218

_ <- A.word8 DOUBLE_QUOTE

219

s1 <- if BACKSLASH `B.elem` s

220

then case Z.parse unescape s of

221

Right r -> return r

222

Left err -> fail err

223

else return s

224

225

case decodeUtf8' s1 of

226

Right r -> return r

227

Left err -> fail $ show err

228

229

{-# INLINE jstring_ #-}

230

231

unescape :: Z.Parser ByteString

232

unescape = toByteString <$> go mempty where

233

go acc = do

234

h <- Z.takeWhile (/=BACKSLASH)

235

let rest = do

236

start <- Z.take 2

237

let !slash = B.unsafeHead start

238

!t = B.unsafeIndex start 1

239

escape = case B.findIndex (==t) "\"\\/ntbrfu" of

240

Just i -> i

241

_ -> 255

242

if slash /= BACKSLASH || escape == 255

243

then fail "invalid JSON escape sequence"

244

else do

245

let cont m = go (acc `mappend` byteString h `mappend` m)

246

{-# INLINE cont #-}

247

if t /= 117 -- 'u'

248

then cont (word8 (B.unsafeIndex mapping escape))

249

else do

250

a <- hexQuad

251

if a < 0xd800 || a > 0xdfff

252

then cont (charUtf8 (chr a))

253

else do

254

b <- Z.string "\\u" *> hexQuad

255

if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff

256

then let !c = ((a - 0xd800) `shiftL` 10) +

257

(b - 0xdc00) + 0x10000

258

in cont (charUtf8 (chr c))

259

else fail "invalid UTF-16 surrogates"

260

done <- Z.atEnd

261

if done

262

then return (acc `mappend` byteString h)

263

else rest

264

mapping = "\"\\/\n\t\b\r\f"

265

266

hexQuad :: Z.Parser Int

267

hexQuad = do

268

s <- Z.take 4

269

let hex n | w >= C_0 && w <= C_9 = w - C_0

270

| w >= C_a && w <= C_f = w - 87

271

| w >= C_A && w <= C_F = w - 55

272

| otherwise = 255

273

where w = fromIntegral $ B.unsafeIndex s n

274

a = hex 0; b = hex 1; c = hex 2; d = hex 3

275

if (a .|. b .|. c .|. d) /= 255

276

then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12)

277

else fail "invalid hex escape"

278

279

decodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString -> Maybe a

280

decodeWith p to s =

281

case L.parse p s of

282

L.Done _ v -> case to v of

283

Success a -> Just a

284

_ -> Nothing

285

_ -> Nothing

286

{-# INLINE decodeWith #-}

287

288

decodeStrictWith :: Parser Value -> (Value -> Result a) -> B.ByteString

289

-> Maybe a

290

decodeStrictWith p to s =

291

case either Error to (A.parseOnly p s) of

292

Success a -> Just a

293

Error _ -> Nothing

294

{-# INLINE decodeStrictWith #-}

295

296

eitherDecodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString

297

-> Either String a

298

eitherDecodeWith p to s =

299

case L.parse p s of

300

L.Done _ v -> case to v of

301

Success a -> Right a

302

Error msg -> Left msg

303

L.Fail _ _ msg -> Left msg

304

{-# INLINE eitherDecodeWith #-}

305

306

eitherDecodeStrictWith :: Parser Value -> (Value -> Result a) -> B.ByteString

307

-> Either String a

308

eitherDecodeStrictWith p to s =

309

case either Error to (A.parseOnly p s) of

310

Success a -> Right a

311

Error msg -> Left msg

312

{-# INLINE eitherDecodeStrictWith #-}

313

314

-- $lazy

315

316

-- The 'json' and 'value' parsers decouple identification from

317

-- conversion. Identification occurs immediately (so that an invalid

318

-- JSON document can be rejected as early as possible), but conversion

319

-- to a Haskell value is deferred until that value is needed.

320

321

-- This decoupling can be time-efficient if only a smallish subset of

322

-- elements in a JSON value need to be inspected, since the cost of

323

-- conversion is zero for uninspected elements. The trade off is an

324

-- increase in memory usage, due to allocation of thunks for values

325

-- that have not yet been converted.

326

327

-- $strict

328

329

-- The 'json'' and 'value'' parsers combine identification with

330

-- conversion. They consume more CPU cycles up front, but have a

331

-- smaller memory footprint.

332

333

-- | Parse a top-level JSON value followed by optional whitespace and

334

-- end-of-input. See also: 'json'.

335

jsonEOF :: Parser Value

336

jsonEOF = json <* skipSpace <* endOfInput

337

338

-- | Parse a top-level JSON value followed by optional whitespace and

339

-- end-of-input. See also: 'json''.

340

jsonEOF' :: Parser Value

341

jsonEOF' = json' <* skipSpace <* endOfInput

342

343

toByteString :: Builder -> ByteString

344

toByteString = L.toStrict . toLazyByteString

345

{-# INLINE toByteString #-}

346

347

aeson :: IO [Benchmark]

348

aeson = do

349

path <- pathTo "json-data"

350

names <- sort . filter (`notElem` [".", ".."]) <$> getDirectoryContents path

351

forM names $ \name -> do

352

bs <- B.readFile (path </> name)

353

return . bench ("attoparsec/" ++ dropExtension name) $ nf (A.parseOnly jsonEOF') bs

354

355

aesonLazy :: IO [Benchmark]

356

aesonLazy = do

357

path <- pathTo "json-data"

358

names <- sort . filter (`notElem` [".", ".."]) <$> getDirectoryContents path

359

forM names $ \name -> do

360

bs <- L.readFile (path </> name)

361

return . bench ("attoparsec/lazy-bytestring/" ++ dropExtension name) $ nf (L.parse jsonEOF') bs

Older »