1
module UU.Parsing.Machine where
2
import UU.Util.BinaryTrees
3
import UU.Parsing.MachineInterface
8
-- ==========================================================================================
9
-- ===== BASIC PARSER TYPE =================================================================
10
-- =======================================================================================
12
newtype RealParser state s p a = P(forall r' r'' . (a -> r'' -> r') ->
13
(state -> Steps r'' s p) -> state -> Steps r' s p)
15
newtype RealRecogn state s p = R(forall r . (state -> Steps r s p) -> state -> Steps r s p)
17
newtype RealAccept state result s p a = A(forall r . (state -> Steps r s p) -> state -> Steps (result a r) s p)
19
newtype ParsRec state result s p a = PR ( RealParser state s p a
20
, RealRecogn state s p
21
, RealAccept state result s p a
24
mkPR (P p, R r) = PR (P p, R r, A (p acceptR))
31
parseRecbasic :: (inp -> Steps (out c d) sym pos)
32
-> ParsRec inp out sym pos a
34
-> Steps (out a (out c d)) sym pos
35
parseRecbasic eof (PR ( P rp, rr, A ra)) inp = (ra eof inp)
37
parsebasic :: (inp -> Steps (out c d) sym pos)
38
-> AnaParser inp out sym pos a
40
-> Steps (out a (out c d)) sym pos
41
parsebasic eof (pp) inp
42
= parseRecbasic eof (pars pp) inp
44
-- =======================================================================================
45
-- ===== CORE PARSERS ====================================================================
46
-- =======================================================================================
47
libAccept :: (OutputState a, InputState b s p) => ParsRec b a s p s
48
libAccept = mkPR (P (\ acc k state ->
49
case splitState state of
50
({-#L-} s, ss {-L#-}) -> OkVal (acc s) (k ss))
52
case splitState state of
53
({-#L-} s, ss {-L#-}) -> Ok (k ss))
55
libInsert c sym firsts =mkPR( P (\acc k state -> let msg = Msg firsts
58
in StRepair c msg (val (acc sym) (k (reportError msg state))))
59
, R (\ k state -> let msg = Msg firsts
62
in StRepair c msg (k (reportError msg state)))
65
{-# INLINE libSeqL #-}
66
{-# INLINE libSeqR #-}
67
{-# INLINE libDollar #-}
68
{-# INLINE libDollarL #-}
69
{-# INLINE libDollarR #-}
70
{-# INLINE libSucceed #-}
72
libSucceed v =mkPR( P (\ acc -> let accv = val (acc v) in {-# SCC "machine" #-} \ k state -> accv (k state))
75
libSeq (PR (P pp, R pr, _)) ~(PR (P qp, R qr, A qa)) =mkPR ( P (\ acc -> let p = pp (nextR acc) in {-# SCC "machine" #-} \k state -> p (qa k) state)
78
libDollar f (PR (P qp, R qr, _ )) = mkPR ( P (\ acc -> {-# SCC "machine" #-} qp (acc.f))
81
libDollarL f (PR (P qp, R qr, _ )) = mkPR ( P (\ acc -> let accf = val (acc f) in {-# SCC "machine" #-} \ k state -> qr (\ inp -> accf ( k inp)) state)
84
libDollarR f (PR (P qp, R qr, _ )) = mkPR (P qp, R qr)
86
libSeqL (PR (P pp, R pr, _ )) ~(PR (P qp, R qr , _ )) = mkPR ( P (\acc -> let p = pp acc in {-# SCC "machine" #-}\k state -> p (qr k) state)
89
libSeqR (PR (P pp, R pr, _ )) ~(PR (P qp, R qr, _ )) = mkPR ( P (\acc -> let q = qp acc in {-# SCC "machine" #-}\k state -> pr (q k) state)
92
libOr (PR (P pp, R pr,_ )) (PR (P qp, R qr, _ )) = mkPR ( P (\ acc -> let p = pp acc
94
in {-# SCC "machine" #-} \ k state -> p k state `libBest` q k state)
95
, R (\ k state -> pr k state `libBest` qr k state)
97
libFail :: OutputState a => ParsRec b a c p d
98
libFail = mkPR ( P (\ _ _ _ -> (usererror "calling an always failing parser" ))
99
, R (\ _ _ -> (usererror "calling an always failing recogniser"))
104
starting :: Steps a s p -> Expecting s
105
starting (StRepair _ m _ ) = getStart m
106
starting (Best l _ _ ) = starting l
107
starting _ = systemerror "UU.Parsing.Machine" "starting"
109
{-# INLINE hasSuccess #-}
110
hasSuccess :: Steps a s p -> Bool
111
hasSuccess (StRepair _ _ _ ) = False
112
hasSuccess (Best _ _ _ ) = False
115
getStart (Msg st _ _) = st
117
addToMessage (Msg exp pos act) more = Msg (more `eor` exp) pos act
120
addexpecting more (StRepair cost msg rest) = StRepair cost (addToMessage msg more) rest
121
addexpecting more (Best l sel r) = Best (addexpecting more l)
122
(addexpecting more sel)
123
(addexpecting more r)
124
addexpecting more (OkVal v rest ) = systemerror "UU_Parsing" ("addexpecting: OkVal")
125
addexpecting more (Ok _ ) = systemerror "UU_Parsing" ("addexpecting: Ok")
126
addexpecting more (Cost _ _ ) = systemerror "UU_Parsing" ("addexpecting: Cost")
127
addexpecting more _ = systemerror "UU_Parsing" ("addexpecting: other")
130
eor :: Ord a => Expecting a -> Expecting a -> Expecting a
131
eor p q = EOr (merge (tolist p) (tolist q))
132
where merge x@(l:ll) y@(r:rr) = case compare l r of
133
LT -> l:( ll `merge` y)
134
GT -> r:( x `merge` rr)
135
EQ -> l:( ll `merge` rr)
141
-- =======================================================================================
142
-- ===== SELECTING THE BEST RESULT ======================================================
143
-- =======================================================================================
144
-- INV: the first argument should be the shorter insertion
145
libBest :: Ord s => Steps b s p -> Steps b s p -> Steps b s p
146
libBest ls rs = libBest' ls rs id id
148
libBest' :: Ord s => Steps b s p -> Steps c s p -> (b -> d) -> (c -> d) -> Steps d s p
149
libBest' (OkVal v ls) (OkVal w rs) lf rf = Ok (libBest' ls rs (lf.v) (rf.w))
150
libBest' (OkVal v ls) (Ok rs) lf rf = Ok (libBest' ls rs (lf.v) rf )
151
libBest' (Ok ls) (OkVal w rs) lf rf = Ok (libBest' ls rs lf (rf.w))
152
libBest' (Ok ls) (Ok rs) lf rf = Ok (libBest' ls rs lf rf )
153
libBest' (OkVal v ls) _ lf rf = OkVal (lf.v) ls
154
libBest' _ (OkVal w rs) lf rf = OkVal (rf.w) rs
155
libBest' (Ok ls) _ lf rf = OkVal lf ls
156
libBest' _ (Ok rs) lf rf = OkVal rf rs
157
libBest' l@(Cost i ls ) r@(Cost j rs ) lf rf
158
| i =={-#L-} j = Cost i (libBest' ls rs lf rf)
159
| i <{-#L-} j = Cost i (val lf ls)
160
| i >{-#L-} j = Cost j (val rf rs)
161
libBest' l@(Cost i ls) _ lf rf = Cost i (val lf ls)
162
libBest' _ r@(Cost j rs) lf rf = Cost j (val rf rs)
163
libBest' l@(NoMoreSteps v) _ lf rf = NoMoreSteps (lf v)
164
libBest' _ r@(NoMoreSteps w) lf rf = NoMoreSteps (rf w)
165
libBest' l r lf rf = libCorrect l r lf rf
167
lib_correct :: Ord s => (b -> c -> Steps d s p) -> (b -> c -> Steps d s p) -> b -> c -> Steps d s p
168
lib_correct p q = \k inp -> libCorrect (p k inp) ( q k inp) id id
170
libCorrect :: Ord s => Steps a s p -> Steps c s p -> (a -> d) -> (c -> d) -> Steps d s p
171
libCorrect ls rs lf rf
172
= let (ToBeat _ choice) = traverse
173
(traverse (ToBeat 999{-#L-} (val lf newleft))
174
(val lf, newleft, 0{-#L-}) 4{-#L-})
175
(val rf, newright, 0{-#L-}) 4{-#L-}
176
newleft = addexpecting (starting rs) ls
177
newright = addexpecting (starting ls) rs
178
in Best (val lf newleft)
182
data ToBeat a = ToBeat Int{-#L-} a
184
traverse :: ToBeat (Steps a s p) -> (Steps v s p -> Steps a s p, Steps v s p, Int{-L#-}) -> Int{-L#-} -> ToBeat (Steps a s p)
185
traverse b@(ToBeat bv br) (f, s, v) 0{-#L-} = {- trace ("comparing " ++ show bv ++ " with " ++ show v ++ "\n") $ -}
189
traverse b@(ToBeat bv br) (f, Ok l, v) n = {- trace ("adding" ++ show n ++ "\n") $-} traverse b (f.Ok , l, v - n + 4) (n -{-#L-} 1{-#L-})
190
traverse b@(ToBeat bv br) (f, OkVal w l, v) n = {- trace ("adding" ++ show n ++ "\n") $-} traverse b (f.OkVal w, l, v - n + 4) (n -{-#L-} 1{-#L-})
191
traverse b@(ToBeat bv br) (f, Cost i l, v) n = if i +{-#L-} v >={-#L-} bv
193
else traverse b (f.Cost i, l, i +{-#L-} v) n
194
traverse b@(ToBeat bv br) (f, Best l _ r, v) n = traverse (traverse b (f, l, v) n) (f, r, v) n
195
traverse b@(ToBeat bv br) (f, StRepair i msgs r, v) n = if i +{-#L-} v >={-#L-} bv then b
196
else traverse b (f.StRepair i msgs, r, i +{-#L-} v) (n -{-#L-} 1{-#L-})
197
traverse b@(ToBeat bv br) (f, t@(NoMoreSteps _), v) n = if bv <={-#L-} v then b else ToBeat v (f t)
198
-- =======================================================================================
199
-- ===== DESCRIPTORS =====================================================================
200
-- =======================================================================================
201
data AnaParser state result s p a
202
= AnaParser { pars :: ParsRec state result s p a
204
, zerop :: Maybe (Bool, Either a (ParsRec state result s p a))
205
, onep :: OneDescr state result s p a
207
data OneDescr state result s p a
208
= OneDescr { firsts :: Expecting s
209
, table :: [(SymbolR s, TableEntry state result s p a)]
212
data TableEntry state result s p a = TableEntry (ParsRec state result s p a) (Expecting s -> ParsRec state result s p a)
213
-- =======================================================================================
214
-- ===== ANALYSING COMBINATORS ===========================================================
215
-- =======================================================================================
216
anaFail :: OutputState a => AnaParser b a c p d
217
anaFail = AnaParser { pars = libFail
222
noOneParser = OneDescr (EOr []) []
224
pEmpty p zp = AnaParser { pars = p
230
anaSucceed v = pEmpty (libSucceed v) (False, Left v)
231
anaLow v = pEmpty (libSucceed v) (True, Left v)
232
anaDynE p = pEmpty p (False, Right p)
233
anaDynL p = pEmpty p (True , Right p)
234
--anaDynN fi len range p = mkParser Nothing (OneDescr len fi [(range, p)])
236
anaOr ld@(AnaParser _ ll zl ol) rd@(AnaParser _ lr zr or)
237
= mkParser newlength newZeroDescr newOneDescr
238
where (newlength, maybeswap) = ll `nat_min` lr
239
newZeroDescr = case zl of {Nothing -> zr
240
;_ -> case zr of {Nothing -> zl
241
;_ -> usererror ("Two empty alternatives")
243
newOneDescr = maybeswap orOneOneDescr ol or False
245
{-# INLINE anaSeq #-}
247
anaSeq libdollar libseq comb (AnaParser pl ll zl ol) ~rd@(AnaParser pr lr zr or)
249
Just (b, zp ) -> let newZeroDescr = seqZeroZero zl zr libdollar libseq comb
250
newOneDescr = let newOneOne = mapOnePars ( `libseq` pr) ol
251
newZeroOne = case zp of
252
Left f -> mapOnePars (f `libdollar` ) or
253
Right p -> mapOnePars (p `libseq` ) or
254
in orOneOneDescr newZeroOne newOneOne b -- left one is shortest
255
in mkParser lr newZeroDescr newOneDescr
256
_ -> AnaParser (pl `libseq` pr) (ll `nat_add` lr) Nothing (mapOnePars (`libseq` pr) ol)
258
seqZeroZero Nothing _ _ _ _ = Nothing
259
seqZeroZero _ Nothing _ _ _ = Nothing
260
seqZeroZero (Just (llow, left)) (Just (rlow, right)) libdollar libseq comb
261
= Just ( llow || rlow
263
Left lv -> case right of
264
Left rv -> Left (comb lv rv)
265
Right rp -> Right (lv `libdollar` rp)
266
Right lp -> case right of
267
Left rv -> Right (lp `libseq` libSucceed rv)
268
Right rp -> Right (lp `libseq` rp)
271
orOneOneDescr ~(OneDescr fl tl) ~(OneDescr fr tr) b
272
= let keystr = map fst tr
273
lefttab = if b then [r | r@(k,_) <- tl, not (k `elem` keystr)] else tl
274
in OneDescr (fl `eor` fr) (lefttab ++ tr)
276
anaCostRange _ _ EmptyR = anaFail
277
anaCostRange ins_cost ins_sym range
278
= mkParser (Succ Zero) Nothing ( OneDescr (ESym range) [(range, TableEntry libAccept
279
(libInsert ins_cost ins_sym)
282
--anaCostSym i ins sym = pCostRange i ins (Range sym sym)
284
anaGetFirsts (AnaParser p l z od) = firsts od
286
anaSetFirsts newexp (AnaParser _ l zd od)
287
= mkParser l zd (od{firsts = newexp })
289
-- =======================================================================================
290
-- ===== UTILITIES ========================================================================
291
-- =======================================================================================
292
mapOnePars fp ~(OneDescr fi t) = OneDescr fi [ (k, TableEntry (fp p) (fp.corr))
293
| (k, TableEntry p corr ) <- t
296
-- =======================================================================================
297
-- ===== MKPARSER ========================================================================
298
-- =======================================================================================
299
mkParser length zd ~descr@(OneDescr firsts tab) -- pattern matching should be lazy for lazy computation of length for empty parsers
300
= let parstab = foldr1 mergeTables [[(k, p)]| (k, TableEntry p _) <- tab]
302
= let ptab = [(k, (getp pr) )| (k, pr) <- parstab]
304
[(s1, p1)] -> ({-# SCC "Locating" #-}\ s -> if r1 s then Just p1 else Nothing )
305
where r1 = symInRange s1
306
[(s1, p1), (s2, p2)] -> ({-# SCC "Locating" #-} \ s -> if r1 s then Just p1 else
307
if r2 s then Just p2 else Nothing)
308
where r1 = symInRange s1
310
[(s1, p1), (s2, p2), (s3, p3)] -> ({-# SCC "Locating" #-}\ s -> if r1 s then Just p1 else
311
if r2 s then Just p2 else
312
if r3 s then Just p3 else Nothing)
313
where r1 = symInRange s1
316
_ -> lookupSym (tab2tree ptab)
317
zerop = getp (case zd of
319
Just (_, Left v) -> libSucceed v
320
Just (_, Right p) -> p
322
-- SDS/AD 20050603: only the shortest alternative in possible corrections now is taken
323
-- insertsyms = foldr1 lib_correct [ getp (pr firsts)| (_ , TableEntry _ pr) <- tab ]
324
insertsyms = head [ getp (pr firsts)| (_ , TableEntry _ pr) <- tab ]
326
= case splitState inp of
327
({-#L-} s, ss {-L#-}) -> let { msg = Msg firsts (getPosition inp) (Delete s)
328
; newinp = reportError msg ss
330
in libCorrect (StRepair (deleteCost s) msg (result k newinp))
331
(insertsyms k inp) id id
332
result = if null tab then zerop
334
Nothing ->({-# SCC "mkParser1" #-}\k inp ->
335
case splitStateE inp of
336
Left' s ss -> case find s of
338
Nothing -> correct k inp
339
Right' ss -> insertsyms k ss)
340
Just (True, _) ->({-# SCC "mkParser2" #-}\k inp ->
341
case splitStateE inp of
342
Left' s ss -> case find s of
344
Nothing -> let r = zerop k inp
345
in if hasSuccess r then r else libCorrect r (correct k inp) id id
346
Right' ss -> zerop k ss)
347
Just (False, _) ->({-# SCC "mkParser3" #-}\k inp ->
348
case splitStateE inp of
349
Left' s ss -> case find s of
350
Just p -> p k inp `libBest` zerop k inp
351
Nothing -> let r = zerop k inp
352
in if hasSuccess r then r else libCorrect r (correct k inp) id id
353
Right' ss -> zerop k ss)
355
res = mkPR (P ( \ acc -> mkactualparser (\ (PR (P p, _ , _)) -> p acc))
356
,R ( mkactualparser (\ (PR (_ , R p, _)) -> p ))
358
in AnaParser res length zd descr
360
-- =======================================================================================
361
-- ===== MINIMAL LENGTHS (lazily formulated) =============================================
362
-- =======================================================================================
369
nat_le _ Zero = False
370
nat_le Infinite _ = False
371
nat_le _ Infinite = True
372
nat_le (Succ l) (Succ r) = nat_le l r
374
nat_min Infinite r = (r, flip)
375
nat_min l Infinite = (l, id)
376
nat_min Zero _ = (Zero, id)
377
nat_min _ Zero = (Zero, flip)
378
nat_min (Succ ll) (Succ rr) = let (v, fl) = ll `nat_min` rr in (Succ v, fl)
380
nat_add Infinite _ = Infinite
382
nat_add (Succ l) r = Succ (nat_add l r)
383
-- =======================================================================================
384
-- ===== CHOICE STRUCTURES =============================================================
385
-- =======================================================================================
388
mergeTables lss@(l@(le@(Range a b),ct ):ls) rss@(r@(re@(Range c d),ct'):rs)
389
= let ct'' = ct `libOr` ct'
390
in if c<a then mergeTables rss lss -- swap
391
else if b<c then l:mergeTables ls rss -- disjoint case
392
else if a<c then (Range a (symBefore c),ct) :mergeTables ((Range c b,ct):ls) rss
393
else if b<d then (Range a b,ct'') :mergeTables ((Range (symAfter b) d,ct'):rs) ls
394
else if b>d then mergeTables rss lss
395
else (le,ct'') : mergeTables ls rs-- equals
397
-- =======================================================================================
398
-- ===== WRAPPING AND MAPPING ==============================================================
399
-- =======================================================================================
401
libMap :: OutputState result =>
402
(forall r r'' . (b -> r -> r'') -> state -> Steps (a, r) s p -> ( state, Steps r'' s p))
403
-> (forall r . state -> Steps ( r) s p -> ( state, Steps r s p))
404
-> ParsRec state result s p a -> ParsRec state result s p b
405
libMap f f' (PR (P p, R r, _)) = mkPR ( P(\acc -> let pp = p (,)
407
in \ k instate -> let inresult = pp k outstate
408
(outstate, outresult) = facc instate inresult
411
, R(\ k instate -> let inresult = r k outstate
412
(outstate, outresult) = f' instate inresult
416
pMap :: OutputState result =>
417
(forall r r'' . (b -> r -> r'') -> state -> Steps (a, r) s p -> ( state, Steps r'' s p))
418
-> (forall r . state -> Steps ( r) s p -> ( state, Steps r s p))
419
-> AnaParser state result s p a -> AnaParser state result s p b
421
pMap f f' (AnaParser p l z o) = AnaParser (libMap f f' p)
425
Just (b, v) -> Just (b, case v of
426
Left w -> Right (libMap f f' (libSucceed w))
427
Right pp -> Right (libMap f f' pp)))
428
(mapOnePars (libMap f f') o)
431
libWrap :: OutputState result =>
432
(forall r r'' . (b -> r -> r'')
435
-> (state -> Steps r s p)
436
-> (state, Steps r'' s p, state -> Steps r s p))
439
-> (state -> Steps r s p)
440
-> (state, Steps r s p, state -> Steps r s p))
441
-> ParsRec state result s p a -> ParsRec state result s p b
442
libWrap f f' (PR (P p, R r, _)) = mkPR ( P(\ acc -> let pp = p (,)
444
in \ k instate -> let (stl, ar, str2rr) = facc instate rl k
448
, R(\ k instate -> let (stl, ar, str2rr) = f' instate rl k
453
pWrap :: OutputState result
454
=> (forall r r'' . (b -> r -> r'')
457
-> (state -> Steps r s p)
458
-> (state, Steps r'' s p, state -> Steps r s p))
461
-> (state -> Steps r s p)
462
-> (state, Steps r s p, state -> Steps r s p))
463
-> AnaParser state result s p a -> AnaParser state result s p b
465
pWrap f f' (AnaParser p l z o) = AnaParser (libWrap f f' p)
469
Just (b, v) -> Just (b, case v of
470
Left w -> Right (libWrap f f' (libSucceed w))
471
Right pp -> Right (libWrap f f' pp)))
472
(mapOnePars (libWrap f f') o)
476
-- =======================================================================================
477
-- ===== BINARY SEARCH TREES =============================================================
478
-- =======================================================================================
480
lookupSym :: Ord a => BinSearchTree (SymbolR a, b) -> a -> Maybe b
481
lookupSym = btFind symRS