1
-----------------------------------------------------------------------------
2
Generation of LALR parsing tables.
4
(c) 1993-1996 Andy Gill, Simon Marlow
5
(c) 1997-2001 Simon Marlow
6
-----------------------------------------------------------------------------
9
> (genActionTable, genGotoTable, genLR0items, precalcClosure0,
10
> propLookaheads, calcLookaheads, mergeLookaheadInfo, countConflicts,
16
> import qualified Set hiding ( Set )
19
> import Control.Monad.ST
20
> import Data.Array.ST
21
> import Data.Array as Array
22
> import Data.List (nub)
24
> unionMap :: (Ord b) => (a -> Set b) -> Set a -> Set b
25
> unionMap f = Set.fold (Set.union . f) Set.empty
27
This means rule $a$, with dot at $b$ (all starting at 0)
29
> type Lr0Item = (Int,Int) -- (rule, dot)
31
> type Lr1Item = (Int,Int,Set Name) -- (rule, dot, lookahead)
32
> type RuleList = [Lr0Item]
34
-----------------------------------------------------------------------------
35
Generating the closure of a set of LR(0) items
37
Precalculate the rule closure for each non-terminal in the grammar,
38
using a memo table so that no work is repeated.
40
> precalcClosure0 :: Grammar -> Name -> RuleList
42
> \n -> case lookup n info' of
47
> info' :: [(Name, RuleList)]
48
> info' = map (\(n,rules) -> (n,map (\rule -> (rule,0)) (Set.toAscList rules))) info
50
> info :: [(Name, Set Int)]
51
> info = mkClosure (==) (\f -> map (follow f) f)
52
> (map (\nt -> (nt,Set.fromList (lookupProdsOfName g nt))) nts)
54
> follow :: [(Name, Set Int)] -> (Name, Set Int) -> (Name, Set Int)
55
> follow f (nt,rules) = (nt, unionMap (followNT f) rules `Set.union` rules)
57
> followNT :: [(Name, Set Int)] -> Int -> Set Int
59
> case findRule g rule 0 of
60
> Just nt | nt >= firstStartTok && nt < fst_term ->
63
> Nothing -> error "followNT"
66
> nts = non_terminals g
67
> fst_term = first_term g
69
> closure0 :: Grammar -> (Name -> RuleList) -> Set Lr0Item -> Set Lr0Item
70
> closure0 g closureOfNT set = Set.fold addRules Set.empty set
72
> fst_term = first_term g
73
> addRules rule set = Set.union (Set.fromList (rule : closureOfRule rule)) set
75
> closureOfRule (rule,dot) =
76
> case findRule g rule dot of
77
> (Just nt) | nt >= firstStartTok && nt < fst_term
81
-----------------------------------------------------------------------------
82
Generating the closure of a set of LR(1) items
84
> closure1 :: Grammar -> ([Name] -> Set Name) -> [Lr1Item] -> [Lr1Item]
85
> closure1 g first set
86
> = fst (mkClosure (\(_,new) _ -> null new) addItems ([],set))
88
> fst_term = first_term g
90
> addItems :: ([Lr1Item],[Lr1Item]) -> ([Lr1Item],[Lr1Item])
91
> addItems (old_items, new_items) = (new_old_items, new_new_items)
93
> new_old_items = new_items `union_items` old_items
94
> new_new_items = subtract_items
95
> (foldr union_items [] (map fn new_items))
98
> fn :: Lr1Item -> [Lr1Item]
100
> case lookupProdNo g rule of { (name,lhs,_,_) ->
101
> case drop dot lhs of
102
> (b:beta) | b >= firstStartTok && b < fst_term ->
103
> let terms = unionMap
104
> (\a -> first (beta ++ [a])) as
106
> [ (rule,0,terms) | rule <- lookupProdsOfName g b ]
110
Subtract the first set of items from the second.
112
> subtract_items :: [Lr1Item] -> [Lr1Item] -> [Lr1Item]
113
> subtract_items items1 items2 = foldr (subtract_item items2) [] items1
115
These utilities over item sets are crucial to performance.
117
Stamp on overloading with judicious use of type signatures...
119
> subtract_item :: [Lr1Item] -> Lr1Item -> [Lr1Item] -> [Lr1Item]
120
> subtract_item [] i result = i : result
121
> subtract_item ((rule,dot,as):items) i@(rule',dot',as') result =
122
> case compare rule' rule of
125
> EQ -> case compare dot' dot of
128
> EQ -> case Set.difference as' as of
129
> bs | Set.null bs -> result
130
> | otherwise -> (rule,dot,bs) : result
132
> carry_on = subtract_item items i result
134
Union two sets of items.
136
> union_items :: [Lr1Item] -> [Lr1Item] -> [Lr1Item]
137
> union_items is [] = is
138
> union_items [] is = is
139
> union_items (i@(rule,dot,as):is) (i'@(rule',dot',as'):is') =
140
> case compare rule rule' of
143
> EQ -> case compare dot dot' of
146
> EQ -> (rule,dot,as `Set.union` as') : union_items is is'
148
> drop_i = i : union_items is (i':is')
149
> drop_i' = i' : union_items (i:is) is'
151
-----------------------------------------------------------------------------
154
The input should be the closure of a set of kernel items I together with
155
a token X (terminal or non-terminal. Output will be the set of kernel
156
items for the set of items goto(I,X)
158
> gotoClosure :: Grammar -> Set Lr0Item -> Name -> Set Lr0Item
159
> gotoClosure gram i x = unionMap fn i
162
> case findRule gram rule_no dot of
163
> Just t | x == t -> Set.singleton (rule_no,dot+1)
166
-----------------------------------------------------------------------------
167
Generating LR0 Item sets
169
The item sets are generated in much the same way as we find the
170
closure of a set of items: we use two sets, those which have already
171
generated more sets, and those which have just been generated. We
172
keep iterating until the second set is empty.
174
The addItems function is complicated by the fact that we need to keep
175
information about which sets were generated by which others.
177
> type ItemSetWithGotos = (Set Lr0Item, [(Name,Int)])
179
> genLR0items :: Grammar -> (Name -> RuleList) -> [ItemSetWithGotos]
180
> genLR0items g precalcClosures
181
> = fst (mkClosure (\(old,new) _ -> null new)
186
> n_starts = length (starts g)
187
> startRules :: [Set Lr0Item]
188
> startRules = [ Set.singleton (rule,0) | rule <- [0..n_starts] ]
190
> tokens = non_terminals g ++ terminals g
192
> addItems :: ([ItemSetWithGotos], [Set Lr0Item])
193
> -> ([ItemSetWithGotos], [Set Lr0Item])
195
> addItems (oldSets,newSets) = (newOldSets, reverse newNewSets)
198
> newOldSets = oldSets ++ (zip newSets intgotos)
200
> itemSets = map fst oldSets ++ newSets
202
First thing to do is for each set in I in newSets, generate goto(I,X)
203
for each token (terminals and nonterminals) X.
205
> gotos :: [[(Name,Set Lr0Item)]]
206
> gotos = map (filter (not . Set.null . snd))
207
> (map (\i -> let i' = closure0 g precalcClosures i in
208
> [ (x,gotoClosure g i' x) | x <- tokens ]) newSets)
210
Next, we assign each new set a number, which is the index of this set
211
in the list of sets comprising all the sets generated so far plus
212
those generated in this iteration. We also filter out those sets that
213
are new, i.e. don't exist in the current list of sets, so that they
216
We also have to make sure that there are no duplicate sets in the
217
*current* batch of goto(I,X) sets, as this could be disastrous. I
218
think I've squished this one with the '++ reverse newSets' in
221
numberSets is built this way so we can use it quite neatly with a foldr.
222
Unfortunately, the code's a little opaque.
225
> :: [(Name,Set Lr0Item)]
229
> -> (Int, [[(Name,Int)]], [Set Lr0Item])
231
> numberSets [] (i,gotos,newSets) = (i,([]:gotos),newSets)
232
> numberSets ((x,gotoix):rest) (i,g:gotos,newSets)
234
> (case indexInto 0 gotoix (itemSets ++ reverse newSets) of
235
> Just j -> (i, ((x,j):g):gotos, newSets)
236
> Nothing -> (i+1,((x,i):g):gotos, gotoix:newSets))
238
Finally, do some fiddling around to get this all in the form we want.
240
> intgotos :: [[(Name,Int)]]
241
> newNewSets :: [Set Lr0Item]
242
> (_, ([]:intgotos), newNewSets) =
243
> foldr numberSets (length newOldSets, [[]], []) gotos
245
> indexInto :: Eq a => Int -> a -> [a] -> Maybe Int
246
> indexInto _ _ [] = Nothing
247
> indexInto i x (y:ys) | x == y = Just i
248
> | otherwise = indexInto (i+1) x ys
250
-----------------------------------------------------------------------------
251
Computing propagation of lookaheads
253
ToDo: generate this info into an array to be used in the subsequent
258
> -> [(Set Lr0Item,[(Name,Int)])] -- LR(0) kernel sets
259
> -> ([Name] -> Set Name) -- First function
261
> [(Int, Lr0Item, Set Name)], -- spontaneous lookaheads
262
> Array Int [(Lr0Item, Int, Lr0Item)] -- propagated lookaheads
265
> propLookaheads gram sets first = (concat s, array (0,length sets - 1)
266
> [ (a,b) | (a,b) <- p ])
269
> (s,p) = unzip (zipWith propLASet sets [0..])
271
> propLASet :: (Set Lr0Item, [(Name, Int)]) -> Int -> ([(Int, Lr0Item, Set Name)],(Int,[(Lr0Item, Int, Lr0Item)]))
272
> propLASet (set,goto) i = (start_spont ++ concat s, (i, concat p))
275
> (s,p) = unzip (map propLAItem (Set.toAscList set))
277
> -- spontaneous EOF lookaheads for each start state & rule...
278
> start_info :: [(String, Name, Name, Bool)]
279
> start_info = starts gram
281
> start_spont :: [(Int, Lr0Item ,Set Name)]
282
> start_spont = [ (start, (start,0),
283
> Set.singleton (startLookahead gram partial))
284
> | (start, (_,_,_,partial)) <-
285
> zip [ 0 .. length start_info - 1] start_info]
287
> propLAItem :: Lr0Item -> ([(Int, Lr0Item, Set Name)], [(Lr0Item, Int, Lr0Item)])
288
> propLAItem item@(rule,dot) = (spontaneous, propagated)
291
> j = closure1 gram first [(rule,dot,Set.singleton dummyTok)]
293
> spontaneous :: [(Int, Lr0Item, Set Name)]
294
> spontaneous = concat [
295
> (case findRule gram rule dot of
297
> Just x -> case lookup x goto of
298
> Nothing -> error "spontaneous"
300
> case Set.filter (/= dummyTok) ts of
301
> ts' | Set.null ts' -> []
302
> | otherwise -> [(k, (rule, dot+1), ts')])
303
> | (rule,dot,ts) <- j ]
305
> propagated :: [(Lr0Item, Int, Lr0Item)]
306
> propagated = concat [
307
> (case findRule gram rule dot of
309
> Just x -> case lookup x goto of
310
> Nothing -> error "propagated"
311
> Just k -> [(item, k, (rule, dot+1))])
312
> | (rule,dot,ts) <- j, dummyTok `elem` (Set.toAscList ts) ]
314
The lookahead for a start rule depends on whether it was declared
315
with %name or %partial: a %name parser is assumed to parse the whole
316
input, ending with EOF, whereas a %partial parser may parse only a
317
part of the input: it accepts when the error token is found.
319
> startLookahead :: Grammar -> Bool -> Name
320
> startLookahead gram partial = if partial then errorTok else eof_term gram
322
-----------------------------------------------------------------------------
325
Special version using a mutable array:
328
> :: Int -- number of states
329
> -> [(Int, Lr0Item, Set Name)] -- spontaneous lookaheads
330
> -> Array Int [(Lr0Item, Int, Lr0Item)] -- propagated lookaheads
331
> -> Array Int [(Lr0Item, Set Name)]
333
> calcLookaheads n_states spont prop
335
> array <- newArray (0,n_states) []
336
> propagate array (foldr fold_lookahead [] spont)
341
> propagate :: STArray s Int [(Lr0Item, Set Name)]
342
> -> [(Int, Lr0Item, Set Name)] -> ST s ()
343
> propagate array [] = return ()
344
> propagate array new = do
346
> items = [ (i,item'',s) | (j,item,s) <- new,
347
> (item',i,item'') <- prop ! j,
349
> new_new <- get_new array items []
350
> add_lookaheads array new
351
> propagate array new_new
353
This function is needed to merge all the (set_no,item,name) triples
354
into (set_no, item, set name) triples. It can be removed when we get
355
the spontaneous lookaheads in the right form to begin with (ToDo).
357
> add_lookaheads array [] = return ()
358
> add_lookaheads array ((i,item,s) : lookaheads) = do
359
> las <- readArray array i
360
> writeArray array i (add_lookahead item s las)
361
> add_lookaheads array lookaheads
363
> get_new array [] new = return new
364
> get_new array (l@(i,item,s):las) new = do
365
> state_las <- readArray array i
366
> get_new array las (get_new' l state_las new)
368
> add_lookahead :: Lr0Item -> Set Name -> [(Lr0Item,Set Name)] ->
369
> [(Lr0Item,Set Name)]
370
> add_lookahead item s [] = [(item,s)]
371
> add_lookahead item s (m@(item',s') : las)
372
> | item == item' = (item, s `Set.union` s') : las
373
> | otherwise = m : add_lookahead item s las
375
> get_new' :: (Int,Lr0Item,Set Name) -> [(Lr0Item,Set Name)] ->
376
> [(Int,Lr0Item,Set Name)] -> [(Int,Lr0Item,Set Name)]
377
> get_new' l [] new = l : new
378
> get_new' l@(i,item,s) (m@(item',s') : las) new
380
> let s'' = Set.filter (\x -> not (Set.member x s')) s in
381
> if Set.null s'' then new else
386
> fold_lookahead :: (Int,Lr0Item,Set Name) -> [(Int,Lr0Item,Set Name)]
387
> -> [(Int,Lr0Item,Set Name)]
388
> fold_lookahead l [] = [l]
389
> fold_lookahead l@(i,item,s) (m@(i',item',s'):las)
390
> | i == i' && item == item' = (i,item, s `Set.union` s'):las
391
> | i < i' = (i,item,s):m:las
392
> | otherwise = m : fold_lookahead l las
397
:: Int -- number of states
398
-> [(Int, Lr0Item, Set Name)] -- spontaneous lookaheads
399
-> Array Int [(Lr0Item, Int, Lr0Item)] -- propagated lookaheads
400
-> Array Int [(Lr0Item, Set Name)]
402
calcLookaheads n_states spont prop
403
= rebuildArray $ fst (mkClosure (\(_,new) _ -> null new) propagate
404
([], foldr addLookahead [] spont))
407
rebuildArray :: [(Int, Lr0Item, Set Name)] -> Array Int [(Lr0Item, Set Name)]
408
rebuildArray xs = accumArray (++) [] (0,n_states-1)
409
[ (a, [(b,c)]) | (a,b,c) <- xs ]
411
propagate (las,new) =
413
items = [ (i,item'',s) | (j,item,s) <- new,
414
(item',i,item'') <- prop ! j,
416
new_new = foldr (\i new -> getNew i las new) [] items
417
new_las = foldr addLookahead las new
421
addLookahead :: (Int,Lr0Item,Set Name) -> [(Int,Lr0Item,Set Name)]
422
-> [(Int,Lr0Item,Set Name)]
423
addLookahead l [] = [l]
424
addLookahead l@(i,item,s) (m@(i',item',s'):las)
425
| i == i' && item == item' = (i,item, s `Set.union` s'):las
426
| i < i' = (i,item,s):m:las
427
| otherwise = m : addLookahead l las
429
getNew :: (Int,Lr0Item,Set Name) -> [(Int,Lr0Item,Set Name)]
430
-> [(Int,Lr0Item,Set Name)] -> [(Int,Lr0Item,Set Name)]
431
getNew l [] new = l:new
432
getNew l@(i,item,s) (m@(i',item',s'):las) new
433
| i == i' && item == item' =
434
let s'' = filter (`notElem` s') s in
435
if null s'' then new else
437
| i < i' = (i,item,s):new
438
| otherwise = getNew l las new
440
-----------------------------------------------------------------------------
443
Stick the lookahead info back into the state table.
446
> :: Array Int [(Lr0Item, Set Name)] -- lookahead info
447
> -> [(Set Lr0Item, [(Name,Int)])] -- state table
448
> -> [ ([Lr1Item], [(Name,Int)]) ]
450
> mergeLookaheadInfo lookaheads sets
451
> = zipWith mergeIntoSet sets [0..]
454
> mergeIntoSet :: (Set Lr0Item, [(Name, Int)]) -> Int -> ([Lr1Item], [(Name, Int)])
455
> mergeIntoSet (items, goto) i
456
> = (concat (map mergeIntoItem (Set.toAscList items)), goto)
459
> mergeIntoItem :: Lr0Item -> [Lr1Item]
460
> mergeIntoItem item@(rule,dot)
462
> where la = case [ s | (item',s) <- lookaheads ! i,
466
> _ -> error "mergIntoItem"
468
-----------------------------------------------------------------------------
469
Generate the goto table
471
This is pretty straightforward, given all the information we stored
472
while generating the LR0 sets of items.
474
Generating the goto table doesn't need lookahead info.
476
> genGotoTable :: Grammar -> [(Set Lr0Item,[(Name,Int)])] -> GotoTable
477
> genGotoTable g sets = gotoTable
479
> Grammar{ first_nonterm = fst_nonterm,
480
> first_term = fst_term,
481
> non_terminals = non_terms } = g
483
> -- goto array doesn't include %start symbols
484
> gotoTable = listArray (0,length sets-1)
486
> (array (fst_nonterm, fst_term-1) [
487
> (n, case lookup n goto of
491
> n >= fst_nonterm, n < fst_term ])
492
> | (set,goto) <- sets ]
494
-----------------------------------------------------------------------------
495
Generate the action table
497
> genActionTable :: Grammar -> ([Name] -> Set Name) ->
498
> [([Lr1Item],[(Name,Int)])] -> ActionTable
499
> genActionTable g first sets = actionTable
501
> Grammar { first_term = fst_term,
504
> priorities = prios } = g
506
> n_starts = length starts
507
> isStartRule rule = rule < n_starts -- a bit hacky, but it'll do for now
509
> term_lim = (head terms,last terms)
510
> actionTable = array (0,length sets-1)
511
> [ (set_no, accumArray res
513
> (possActions goto set))
514
> | ((set,goto),set_no) <- zip sets [0..] ]
516
> possAction goto set (rule,pos,la) =
517
> case findRule g rule pos of
518
> Just t | t >= fst_term || t == errorTok ->
519
> case lookup t goto of
522
> case lookup t prios of
523
> Nothing -> [ (t,LR'Shift j{-'-} No) ]
524
> Just p -> [ (t,LR'Shift j{-'-} p) ]
527
> -> let (_,_,_,partial) = starts !! rule in
528
> [ (startLookahead g partial, LR'Accept{-'-}) ]
530
> -> case lookupProdNo g rule of
531
> (_,_,_,p) -> zip (Set.toAscList la) (repeat (LR'Reduce rule p))
534
> possActions goto coll =
535
> (concat [ possAction goto coll item |
536
> item <- closure1 g first coll ])
538
These comments are now out of date! /JS
540
Here's how we resolve conflicts, leaving a complete record of the
541
conflicting actions in an LR'Multiple structure for later output in
544
Shift/reduce conflicts are always resolved as shift actions, and
545
reduce/reduce conflicts are resolved as a reduce action using the rule
546
with the lowest number (i.e. the rule that comes first in the grammar
549
NOTES on LR'MustFail: this was introduced as part of the precedence
550
parsing changes. The problem with LR'Fail is that it is a soft
551
failure: we sometimes substitute an LR'Fail for an LR'Reduce (eg. when
552
computing default actions), on the grounds that an LR'Fail in this
553
state will also be an LR'Fail in the goto state, so we'll fail
554
eventually. This may not be true with precedence parsing, though. If
555
there are two non-associative operators together, we must fail at this
556
point rather than reducing. Hence the use of LR'MustFail.
559
NOTE: on (LR'Multiple as a) handling
560
PCC [sep04] has changed this to have the following invariants:
561
* the winning action appears only once, in the "a" slot
562
* only reductions appear in the "as" list
563
* there are no duplications
564
This removes complications elsewhere, where LR'Multiples were
565
building up tree structures...
569
> res LR'MustFail x = LR'MustFail
570
> res x LR'MustFail = LR'MustFail
571
> res x x' | x == x' = x
572
> res (LR'Accept) _ = LR'Accept
573
> res _ (LR'Accept) = LR'Accept
575
> res a@(LR'Multiple as x) b@(LR'Multiple bs x')
576
> | x == x' = LR'Multiple (nub $ as ++ bs) x
577
> -- merge dropped reductions for identical action
579
> res a@(LR'Multiple as x) b@(LR'Multiple bs x')
582
> | a == x -> LR'Multiple (nub $ x' : as ++ bs ++ cs) x
583
> | a == x' -> LR'Multiple (nub $ x : as ++ bs ++ cs) x'
584
> | otherwise -> error "failed invariant in resolve"
585
> -- last means an unexpected change
587
> -- merge dropped reductions for clashing actions, but only
588
> -- if they were S/R or R/R
590
> res a@(LR'Multiple _ _) b = res a (LR'Multiple [] b)
591
> res a b@(LR'Multiple _ _) = res (LR'Multiple [] a) b
592
> -- leave cases above to do the appropriate merging
594
> res a@(LR'Shift s p) b@(LR'Reduce s' p') = res b a
595
> res a@(LR'Reduce s p) b@(LR'Shift s' p')
597
> (No,_) -> LR'Multiple [a] b -- shift wins
598
> (_,No) -> LR'Multiple [a] b -- shift wins
599
> (Prio c i, Prio _ j)
606
> None -> LR'MustFail
607
> res a@(LR'Reduce r p) b@(LR'Reduce r' p')
609
> (No,_) -> LR'Multiple [a] b -- give to earlier rule?
610
> (_,No) -> LR'Multiple [a] b
611
> (Prio c i, Prio _ j)
614
> | r < r' -> LR'Multiple [b] a
615
> | otherwise -> LR'Multiple [a] b
616
> res _ _ = error "confict in resolve"
618
-----------------------------------------------------------------------------
621
> countConflicts :: ActionTable -> (Array Int (Int,Int), (Int,Int))
622
> countConflicts action
623
> = (conflictArray, foldr (\(a,b) (c,d) -> (a+c, b+d)) (0,0) conflictList)
627
> conflictArray = listArray (Array.bounds action) conflictList
628
> conflictList = map countConflictsState (assocs action)
630
> countConflictsState (state, actions)
631
> = foldr countMultiples (0,0) (elems actions)
633
> countMultiples (LR'Multiple (_:_) (LR'Shift{})) (sr,rr)
635
> countMultiples (LR'Multiple (_:_) (LR'Reduce{})) (sr,rr)
637
> countMultiples (LR'Multiple as a) (sr,rr)
638
> = error "bad conflict representation"
639
> countMultiples _ c = c
641
-----------------------------------------------------------------------------
643
> findRule :: Grammar -> Int -> Int -> Maybe Name
644
> findRule g rule dot =
645
> case lookupProdNo g rule of
646
> (_,lhs,_,_) -> case drop dot lhs of