1
-----------------------------------------------------------------------------
4
(c) 1993-2001 Andy Gill, Simon Marlow
5
-----------------------------------------------------------------------------
7
Here is our mid-section datatype
12
> Production, Grammar(..), mangler,
14
> LRAction(..), ActionTable, Goto(..), GotoTable, Priority(..),
17
> errorName, errorTok, startName, firstStartTok, dummyTok,
25
> import AttrGrammarParser
30
> import Maybe (fromMaybe)
40
> type Production = (Name,[Name],(String,[Int]),Priority)
44
> productions :: [Production],
45
> lookupProdNo :: Int -> Production,
46
> lookupProdsOfName :: Name -> [Int],
47
> token_specs :: [(Name,String)],
48
> terminals :: [Name],
49
> non_terminals :: [Name],
50
> starts :: [(String,Name,Name,Bool)],
51
> types :: Array Int (Maybe String),
52
> token_names :: Array Int String,
53
> first_nonterm :: Name,
56
> priorities :: [(Name,Priority)],
57
> token_type :: String,
58
> imported_identity :: Bool,
59
> monad :: (Bool,String,String,String,String),
60
> expect :: Maybe Int,
61
> attributes :: [(String,String)],
62
> attributetype :: String,
63
> lexer :: Maybe (String,String),
64
> error_handler :: Maybe String
69
> instance Show Grammar where
70
> showsPrec _ (Grammar
74
> , non_terminals = nts
78
> , first_nonterm = fnt
82
> = showString "productions = " . shows p
83
> . showString "\ntoken_specs = " . shows t
84
> . showString "\nterminals = " . shows ts
85
> . showString "\nnonterminals = " . shows nts
86
> . showString "\nstarts = " . shows starts
87
> . showString "\ntypes = " . shows tys
88
> . showString "\ntoken_names = " . shows e
89
> . showString "\nfirst_nonterm = " . shows fnt
90
> . showString "\nfirst_term = " . shows ft
91
> . showString "\neof = " . shows eof
96
> data Assoc = LeftAssoc | RightAssoc | None
104
> data Priority = No | Prio Assoc Int
112
> instance Eq Priority where
114
> Prio _ i == Prio _ j = i == j
117
> mkPrio :: Int -> Directive a -> Priority
118
> mkPrio i (TokenNonassoc _) = Prio None i
119
> mkPrio i (TokenRight _) = Prio RightAssoc i
120
> mkPrio i (TokenLeft _) = Prio LeftAssoc i
121
> mkPrio i _ = error "Panic: impossible case in mkPrio"
123
-----------------------------------------------------------------------------
126
All the tokens in the grammar are mapped onto integers, for speed.
127
The namespace is broken up as follows:
137
These numbers are deeply magical, change at your own risk. Several
138
other places rely on these being arranged as they are, including
139
ProduceCode.lhs and the various HappyTemplates.
141
Unfortunately this means you can't tell whether a given token is a
142
terminal or non-terminal without knowing the boundaries of the
143
namespace, which are kept in the Grammar structure.
145
In hindsight, this was probably a bad idea.
147
> startName = "%start" -- with a suffix, like %start_1, %start_2 etc.
149
> errorName = "error"
150
> dummyName = "%dummy" -- shouldn't occur in the grammar anywhere
152
> firstStartTok, dummyTok, errorTok, epsilonTok :: Name
158
> isEmpty :: Name -> Bool
159
> isEmpty n | n == epsilonTok = True
160
> | otherwise = False
162
-----------------------------------------------------------------------------
165
This bit is a real mess, mainly because of the error message support.
169
> Failed e -> Failed e
170
> Succeeded a -> case k a of
171
> Failed e -> Failed e
172
> Succeeded b -> Succeeded b
176
> Failed e -> case k (error "parE") of
177
> Failed e' -> Failed (e ++ e')
178
> Succeeded _ -> Failed e
179
> Succeeded a -> case k a of
180
> Failed e -> Failed e
181
> Succeeded b -> Succeeded b
183
> parEs [] = Succeeded []
184
> parEs (x:xs) = x `parE` \x' ->
185
> parEs xs `parE` \xs' ->
188
> failMap :: (b -> c) -> MaybeErr a [b] -> MaybeErr a [c]
189
> failMap f e = case e of
190
> Succeeded a -> Succeeded a
191
> Failed s -> Failed (map f s)
194
> mangler :: FilePath -> AbsSyn -> MaybeErr Grammar [String]
195
> mangler file (AbsSyn hd dirs rules tl) =
197
> -- add filename to all error messages
198
> failMap (\s -> file ++ ": " ++ s) $
200
> checkRules ([n | (n,_,_) <- rules]) "" [] `thenE` \nonterm_strs ->
204
> terminal_strs = concat (map getTerm dirs) ++ [eofName]
206
> n_starts = length starts
207
> n_nts = length nonterm_strs
208
> n_ts = length terminal_strs
209
> first_nt = firstStartTok + n_starts
210
> first_t = first_nt + n_nts
211
> last_start = first_nt - 1
212
> last_nt = first_t - 1
213
> last_t = first_t + n_ts - 1
215
> start_names = [ firstStartTok .. last_start ]
216
> nonterm_names = [ first_nt .. last_nt ]
217
> terminal_names = [ first_t .. last_t ]
219
> starts = case getParserNames dirs of
220
> [] -> [TokenName "happyParse" Nothing False]
223
> start_strs = [ startName++'_':p | (TokenName p _ _) <- starts ]
225
Build up a mapping from name values to strings.
227
> name_env = (errorTok, errorName) :
228
> (dummyTok, dummyName) :
229
> zip start_names start_strs ++
230
> zip nonterm_names nonterm_strs ++
231
> zip terminal_names terminal_strs
233
> lookupName :: String -> [Name]
234
> lookupName n = [ t | (t,r) <- name_env, r == n ]
237
> case lookupName str of
239
> [] -> Failed ["unknown identifier `" ++ str ++ "'"]
240
> _ -> Failed ["multiple use of `" ++ str ++ "'"]
244
> -- default start token is the first non-terminal in the grammar
245
> lookupStart (TokenName s Nothing _) = Succeeded first_nt
246
> lookupStart (TokenName s (Just n) _) = mapToName n
249
> parEs (map lookupStart starts) `thenE` \ start_toks ->
252
> parser_names = [ s | TokenName s _ _ <- starts ]
253
> start_partials = [ b | TokenName _ _ b <- starts ]
254
> start_prods = zipWith (\nm tok -> (nm, [tok], ("no code",[]), No))
255
> start_names start_toks
257
Deal with priorities...
259
> priodir = zip [1..] (getPrios dirs)
261
> prios = [ (name,mkPrio i dir)
262
> | (i,dir) <- priodir
263
> , nm <- AbsSyn.getPrioNames dir
264
> , name <- lookupName nm
267
> prioByString = [ (name, mkPrio i dir)
268
> | (i,dir) <- priodir
269
> , name <- AbsSyn.getPrioNames dir
272
Translate the rules from string to name-based.
274
> convNT (nt, prods, ty)
275
> = mapToName nt `thenE` \nt' ->
276
> Succeeded (nt', prods, ty)
278
> attrs = getAttributes dirs
279
> attrType = fromMaybe "HappyAttrs" (getAttributetype dirs)
281
> transRule (nt, prods, ty)
282
> = parEs (map (finishRule nt) prods)
284
> finishRule nt (lhs,code,line,prec)
285
> = failMap (addLine line) $
286
> parEs (map mapToName lhs) `parE` \lhs' ->
287
> checkCode (length lhs) lhs' nonterm_names code attrs `thenE` \code' ->
288
> case mkPrec lhs' prec of
289
> Left s -> Failed ["Undeclared precedence token: " ++ s]
290
> Right p -> Succeeded (nt, lhs', code', p)
292
> mkPrec :: [Name] -> Maybe String -> Either String Priority
295
> Nothing -> case filter (flip elem terminal_names) lhs of
297
> xs -> case lookup (last xs) prios of
298
> Nothing -> Right No
300
> Just s -> case lookup s prioByString of
305
> parEs (map convNT rules) `thenE` \rules1 ->
306
> parEs (map transRule rules1) `thenE` \rules2 ->
309
> tys = accumArray (\a b -> b) Nothing (first_nt, last_nt)
310
> [ (nm, Just ty) | (nm, _, Just ty) <- rules1 ]
312
> env_array :: Array Int String
313
> env_array = array (errorTok, last_t) name_env
316
Get the token specs in terms of Names.
319
> fixTokenSpec (a,b) = mapToName a `thenE` \a -> Succeeded (a,b)
321
> parEs (map fixTokenSpec (getTokenSpec dirs)) `thenE` \tokspec ->
324
> ass = combinePairs [ (a,no)
325
> | ((a,_,_,_),no) <- zip productions [0..] ]
326
> arr = array (firstStartTok, length ass - 1 + firstStartTok) ass
328
> lookup_prods :: Name -> [Int]
329
> lookup_prods x | x >= firstStartTok && x < first_t = arr ! x
330
> lookup_prods _ = error "lookup_prods"
332
> productions = start_prods ++ concat rules2
333
> prod_array = listArray' (0,length productions-1) productions
336
> Succeeded (Grammar {
337
> productions = productions,
338
> lookupProdNo = (prod_array !),
339
> lookupProdsOfName = lookup_prods,
340
> token_specs = tokspec,
341
> terminals = errorTok : terminal_names,
342
> non_terminals = start_names ++ nonterm_names,
343
> -- INCLUDES the %start tokens
344
> starts = zip4 parser_names start_names start_toks
347
> token_names = env_array,
348
> first_nonterm = first_nt,
349
> first_term = first_t,
350
> eof_term = last terminal_names,
351
> priorities = prios,
352
> imported_identity = getImportedIdentity dirs,
353
> monad = getMonad dirs,
354
> lexer = getLexer dirs,
355
> error_handler = getError dirs,
356
> token_type = getTokenType dirs,
357
> expect = getExpect dirs,
358
> attributes = attrs,
359
> attributetype = attrType
362
For combining actions with possible error messages.
364
> addLine :: Int -> String -> String
365
> addLine l s = show l ++ ": " ++ s
367
> getTerm (TokenSpec stuff) = map fst stuff
372
> checkRules (name:rest) above nonterms
373
> | name == above = checkRules rest name nonterms
374
> | name `elem` nonterms
375
> = Failed ["Multiple rules for `" ++ name ++ "'"]
376
> | otherwise = checkRules rest name (name : nonterms)
378
> checkRules [] _ nonterms = Succeeded (reverse nonterms)
381
-----------------------------------------------------------------------------
382
-- If any attribute directives were used, we are in an attribute grammar, so
383
-- go do special processing. If not, pass on to the regular processing routine
385
> checkCode :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> MaybeErr (String,[Int]) [String]
386
> checkCode arity lhs nonterm_names code [] = doCheckCode arity code
387
> checkCode arity lhs nonterm_names code attrs = rewriteAttributeGrammar arity lhs nonterm_names code attrs
389
------------------------------------------------------------------------------
390
-- Special processing for attribute grammars. We re-parse the body of the code
391
-- block and output the nasty-looking record manipulation and let binding goop
394
> rewriteAttributeGrammar :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> MaybeErr (String,[Int]) [String]
395
> rewriteAttributeGrammar arity lhs nonterm_names code attrs =
397
first we need to parse the body of the code block
399
> case runP agParser code 0 of
400
> FailP msg -> Failed [ "error in attribute grammar rules: "++msg ]
403
now we break the rules into three lists, one for synthesized attributes,
404
one for inherited attributes, and one for conditionals
406
> let (selfRules,subRules,conditions) = partitionRules [] [] [] rules
407
> attrNames = map fst attrs
408
> defaultAttr = head attrNames
410
now check that $i references are in range
412
> in parEs (map checkArity (mentionedProductions rules)) `thenE` \prods ->
416
> formatRules arity attrNames defaultAttr
417
> allSubProductions selfRules
418
> subRules conditions `thenE` \rulesStr ->
420
return the munged code body and all sub-productions mentioned
422
> Succeeded (rulesStr,nub (allSubProductions++prods))
425
> where partitionRules a b c [] = (a,b,c)
426
> partitionRules a b c (RightmostAssign attr toks : xs) = partitionRules a (SubAssign (arity,attr) toks : b) c xs
427
> partitionRules a b c (x@(SelfAssign _ _ ) : xs) = partitionRules (x:a) b c xs
428
> partitionRules a b c (x@(SubAssign _ _) : xs) = partitionRules a (x:b) c xs
429
> partitionRules a b c (x@(Conditional _) : xs) = partitionRules a b (x:c) xs
431
> allSubProductions = map (+1) (findIndices (`elem` nonterm_names) lhs)
433
> mentionedProductions rules = [ i | (AgTok_SubRef (i,_)) <- concat (map getTokens rules) ]
435
> getTokens (SelfAssign _ toks) = toks
436
> getTokens (SubAssign _ toks) = toks
437
> getTokens (Conditional toks) = toks
438
> getTokens (RightmostAssign _ toks) = toks
440
> checkArity x = if x <= arity then Succeeded x else Failed [show x++" out of range"]
444
------------------------------------------------------------------------------------
445
-- Actually emit the code for the record bindings and conditionals
448
> formatRules :: Int -> [String] -> String -> [Name]
449
> -> [AgRule] -> [AgRule] -> [AgRule]
450
> -> MaybeErr String [String]
452
> formatRules arity attrNames defaultAttr prods selfRules subRules conditions = Succeeded $
453
> concat [ "\\happyInhAttrs -> let { "
454
> , "happySelfAttrs = happyInhAttrs",formattedSelfRules
455
> , subProductionRules
456
> , "; happyConditions = ", formattedConditions
457
> , " } in (happyConditions,happySelfAttrs)"
460
> where formattedSelfRules = case selfRules of [] -> []; _ -> "{ "++formattedSelfRules'++" }"
461
> formattedSelfRules' = concat $ intersperse ", " $ map formatSelfRule selfRules
462
> formatSelfRule (SelfAssign [] toks) = defaultAttr++" = "++(formatTokens toks)
463
> formatSelfRule (SelfAssign attr toks) = attr++" = "++(formatTokens toks)
465
> subRulesMap :: [(Int,[(String,[AgToken])])]
466
> subRulesMap = map (\l -> foldr (\ (_,x) (i,xs) -> (i,x:xs))
467
> (fst $ head l,[snd $ head l])
469
> groupBy (\x y -> (fst x) == (fst y)) .
470
> sortBy (\x y -> compare (fst x) (fst y)) .
471
> map (\(SubAssign (i,id) toks) -> (i,(id,toks))) $ subRules
473
> subProductionRules = concat $ map formatSubRules prods
476
> let attrs = fromMaybe [] . lookup i $ subRulesMap
477
> attrUpdates' = concat $ intersperse ", " $ map (formatSubRule i) attrs
478
> attrUpdates = case attrUpdates' of [] -> []; x -> "{ "++x++" }"
479
> in concat ["; (happyConditions_",show i,",happySubAttrs_",show i,") = ",mkHappyVar i
480
> ," happyEmptyAttrs"
484
> formattedConditions = concat $ intersperse "++" $ localConditions : (map (\i -> "happyConditions_"++(show i)) prods)
485
> localConditions = "["++(concat $ intersperse ", " $ map formatCondition conditions)++"]"
486
> formatCondition (Conditional toks) = formatTokens toks
488
> formatSubRule i ([],toks) = defaultAttr++" = "++(formatTokens toks)
489
> formatSubRule i (attr,toks) = attr++" = "++(formatTokens toks)
491
> formatTokens tokens = concat (map formatToken tokens)
493
> formatToken AgTok_LBrace = "{ "
494
> formatToken AgTok_RBrace = "} "
495
> formatToken AgTok_Where = "where "
496
> formatToken AgTok_Semicolon = "; "
497
> formatToken AgTok_Eq = "="
498
> formatToken (AgTok_SelfRef []) = "("++defaultAttr++" happySelfAttrs) "
499
> formatToken (AgTok_SelfRef x) = "("++x++" happySelfAttrs) "
500
> formatToken (AgTok_RightmostRef x) = formatToken (AgTok_SubRef (arity,x))
501
> formatToken (AgTok_SubRef (i,[]))
502
> | i `elem` prods = "("++defaultAttr++" happySubAttrs_"++(show i)++") "
503
> | otherwise = mkHappyVar i ++ " "
504
> formatToken (AgTok_SubRef (i,x))
505
> | i `elem` prods = "("++x++" happySubAttrs_"++(show i)++") "
506
> | otherwise = error "lhs "++(show i)++" is not a non-terminal"
507
> formatToken (AgTok_Unknown x) = x++" "
510
-----------------------------------------------------------------------------
511
-- Check for every $i that i is <= the arity of the rule.
513
-- At the same time, we collect a list of the variables actually used in this
514
-- code, which is used by the backend.
516
> doCheckCode :: Int -> String -> MaybeErr (String, [Int]) [String]
517
> doCheckCode arity code = go code "" []
518
> where go code acc used =
520
> [] -> Succeeded (reverse acc, used)
522
> '"' :r -> case reads code :: [(String,String)] of
523
> [] -> go r ('"':acc) used
524
> (s,r):_ -> go r (reverse (show s) ++ acc) used
525
> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc) used
526
> '\'' :r -> case reads code :: [(Char,String)] of
527
> [] -> go r ('\'':acc) used
528
> (c,r):_ -> go r (reverse (show c) ++ acc) used
529
> '\\':'$':r -> go r ('$':acc) used
531
> '$':'>':r -- the "rightmost token"
532
> | arity == 0 -> Failed [ "$> in empty rule" ]
533
> | otherwise -> go r (reverse (mkHappyVar arity) ++ acc)
536
> '$':r@(i:_) | isDigit i ->
537
> case reads r :: [(Int,String)] of
540
> then Failed [ '$': show j ++ " out of range" ]
541
> `parE` \_ -> go r acc used
542
> else go r (reverse (mkHappyVar j) ++ acc)
545
> c:r -> go r (c:acc) used
547
> mkHappyVar n = "happy_var_" ++ show n
549
-----------------------------------------------------------------------------
550
-- Internal Reduction Datatypes
552
> data LRAction = LR'Shift Int Priority -- state number and priority
553
> | LR'Reduce Int Priority-- rule no and priority
556
> | LR'MustFail -- :-(
557
> | LR'Multiple [LRAction] LRAction -- conflict
568
> type ActionTable = Array Int{-state-} (Array Int{-terminal#-} LRAction)
570
instance Text LRAction where
571
showsPrec _ (LR'Shift i _) = showString ("s" ++ show i)
572
showsPrec _ (LR'Reduce i _)
573
= showString ("r" ++ show i)
574
showsPrec _ (LR'Accept) = showString ("acc")
575
showsPrec _ (LR'Fail) = showString (" ")
576
instance Eq LRAction where { (==) = primGenericEq }
578
> data Goto = Goto Int | NoGoto
589
> type GotoTable = Array Int{-state-} (Array Int{-nonterminal #-} Goto)