2
BNF Converter: Happy Generator
3
Copyright (C) 2004 Author: Markus Forberg, Aarne Ranta
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 2 of the License, or
8
(at your option) any later version.
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software
17
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
cf2HappyS -- cf2HappyS :: CF -> CFCat -> String
28
import Data.List (intersperse, sort)
30
import Options (HappyMode(..))
33
type Rules = [(NonTerminal,[(Pattern,Action)])]
34
type NonTerminal = String
41
moduleName = "HappyParser"
48
cf2HappyS :: String -> String -> String -> String -> HappyMode -> Bool -> CF -> String
49
---- cf2HappyS :: String -> CF -> String
52
-- The main function, that given a CF and a CFCat to parse according to,
53
-- generates a happy module.
54
cf2Happy name absName lexName errName mode byteStrings cf
56
[header name absName lexName errName mode byteStrings,
57
declarations mode (allEntryPoints cf),
61
specialRules byteStrings cf,
62
prRules (rulesForHappy cf),
63
finalize byteStrings cf]
65
-- construct the header.
66
header :: String -> String -> String -> String -> HappyMode -> Bool -> String
67
header modName absName lexName errName mode byteStrings = unlines
68
["-- This Happy file was machine-generated by the BNF converter",
70
"{-# OPTIONS_GHC -fno-warn-incomplete-patterns -fno-warn-overlapping-patterns #-}",
72
Standard -> "module " ++ modName ++ " where"
73
GLR -> "-- module name filled in by Happy",
77
if byteStrings then "import qualified Data.ByteString.Char8 as BS" else "",
82
cf2Happy :: String -> CF -> String
86
declarations (allEntryPoints cf),
91
prRules (rulesForHappy cf),
94
-- construct the header.
95
header :: String -> String
97
["-- This Happy file was machine-generated by the BNF converter",
99
"module Par" ++ name ++ " where",
107
-- The declarations of a happy file.
108
declarations :: HappyMode -> [NonTerminal] -> String
109
declarations mode ns = unlines
112
Standard -> "-- no lexer declaration"
113
GLR -> "%lexer { myLexer } { Err _ }",
114
"%monad { Err } { thenM } { returnM }",
115
"%tokentype { " ++ tokenName ++ " }"]
116
where generateP [] = []
117
generateP (n:ns) = concat ["%name p",n'," ",n',"\n",generateP ns]
118
where n' = identCat n
120
-- The useless delimiter symbol.
124
-- Generate the list of tokens and their identifiers.
125
tokens :: [(String,Int)] -> String
126
tokens toks = "%token \n" ++ prTokens toks
127
where prTokens [] = []
128
prTokens ((t,k):tk) = " " ++ (convert t) ++
129
" { " ++ oneTok t k ++ " }\n" ++
131
oneTok t k = "PT _ (TS _ " ++ show k ++ ")"
133
-- Happy doesn't allow characters such as ��� to occur in the happy file. This
134
-- is however not a restriction, just a naming paradigm in the happy source file.
135
convert :: String -> String
136
convert "\\" = concat ['\'':"\\\\","\'"]
137
convert xs = concat ['\'':(escape xs),"\'"]
139
escape ('\'':xs) = '\\':'\'':escape xs
140
escape (x:xs) = x:escape xs
142
rulesForHappy :: CF -> Rules
143
rulesForHappy cf = map mkOne $ ruleGroups cf where
144
mkOne (cat,rules) = constructRule cf rules cat
146
-- For every non-terminal, we construct a set of rules. A rule is a sequence of
147
-- terminals and non-terminals, and an action to be performed
148
-- As an optimization, a pair of list rules [C] ::= "" | C k [C]
149
-- is left-recursivized into [C] ::= "" | [C] C k.
150
-- This could be generalized to cover other forms of list rules.
151
constructRule :: CF -> [Rule] -> NonTerminal -> (NonTerminal,[(Pattern,Action)])
152
constructRule cf rules nt = (nt,[(p,generateAction nt (revF b r) m) |
154
let (b,r) = if isConsFun (funRule r0) && elem (valCat r0) revs
155
then (True,revSepListRule r0)
157
let (p,m) = generatePatterns cf r])
159
revF b r = if b then ("flip " ++ funRule r) else (underscore $ funRule r)
160
revs = reversibleCats cf
161
underscore f | isDefinedRule f = f ++ "_"
164
-- Generates a string containing the semantic action.
165
-- An action can for example be: Sum $1 $2, that is, construct an AST
166
-- with the constructor Sum applied to the two metavariables $1 and $2.
167
generateAction :: NonTerminal -> Fun -> [MetaVar] -> Action
168
generateAction nt f ms = unwords $ (if isCoercion f then [] else [f]) ++ ms
170
-- Generate patterns and a set of metavariables indicating
171
-- where in the pattern the non-terminal
173
generatePatterns :: CF -> Rule -> (Pattern,[MetaVar])
174
generatePatterns cf r = case rhsRule r of
175
[] -> ("{- empty -}",[])
176
its -> (unwords (map mkIt its), metas its)
181
metas its = [revIf c ('$': show i) | (i,Left c) <- zip [1 ::Int ..] its]
182
revIf c m = if (not (isConsFun (funRule r)) && elem c revs)
183
then ("(reverse " ++ m ++ ")")
184
else m -- no reversal in the left-recursive Cons rule itself
185
revs = reversibleCats cf
187
-- We have now constructed the patterns and actions,
188
-- so the only thing left is to merge them into one string.
190
prRules :: Rules -> String
191
prRules = unlines . map prOne
193
prOne (nt,[]) = [] -- nt has only internal use
194
prOne (nt,((p,a):ls)) =
195
unwords [nt', "::", "{", normCat nt, "}\n" ++
196
nt', ":" , p, "{", a, "}", "\n" ++ pr ls] ++ "\n"
201
unlines [(concat $ intersperse " " [" |", p, "{", a , "}"])] ++ pr ls
203
-- Finally, some haskell code.
205
finalize :: Bool -> CF -> String
206
finalize byteStrings cf = unlines $
209
"\nreturnM :: a -> Err a",
211
"\nthenM :: Err a -> (a -> Err b) -> Err b",
213
"\nhappyError :: [" ++ tokenName ++ "] -> Err a",
215
" Bad $ \"syntax error at \" ++ tokenPos ts ++ ",
218
" [Err _] -> \" due to lexer error\"",
219
" _ -> \" before \" ++ unwords (map ("++stringUnpack++" . prToken) (take 4 ts))",
222
] ++ definedRules cf ++ [ "}" ]
225
| byteStrings = "BS.unpack"
229
definedRules cf = [ mkDef f xs e | FunDef f xs e <- pragmasOfCF cf ]
231
mkDef f xs e = unwords $ (f ++ "_") : xs' ++ ["=", show e']
235
underscore (App x es)
236
| isLower $ head x = App (x ++ "_") $ map underscore es
237
| otherwise = App x $ map underscore es
240
-- aarne's modifs 8/1/2002:
241
-- Markus's modifs 11/02/2002
244
specialToks :: CF -> String
245
specialToks cf = unlines $
246
(map aux (literals cf))
250
"Ident" -> "L_ident { PT _ (TV $$) }"
251
"String" -> "L_quoted { PT _ (TL $$) }"
252
"Integer" -> "L_integ { PT _ (TI $$) }"
253
"Double" -> "L_doubl { PT _ (TD $$) }"
254
"Char" -> "L_charac { PT _ (TC $$) }"
255
own -> "L_" ++ own ++ " { PT _ (T_" ++ own ++ " " ++ posn ++ ") }"
257
posn = if isPositionCat cf cat then "_" else "$$"
259
specialRules :: Bool -> CF -> String
260
specialRules byteStrings cf = unlines $
261
map aux (literals cf)
265
"Ident" -> "Ident :: { Ident } : L_ident { Ident $1 }"
266
"String" -> "String :: { String } : L_quoted { "++stringUnpack++" $1 }"
267
"Integer" -> "Integer :: { Integer } : L_integ { (read ("++stringUnpack++" $1)) :: Integer }"
268
"Double" -> "Double :: { Double } : L_doubl { (read ("++stringUnpack++" $1)) :: Double }"
269
"Char" -> "Char :: { Char } : L_charac { (read ("++stringUnpack++" $1)) :: Char }"
270
own -> own ++ " :: { " ++ own ++ "} : L_" ++ own ++ " { " ++ own ++ " ("++ posn ++ "$1)}"
271
-- PCC: take "own" as type name? (manual says newtype)
273
posn = if isPositionCat cf cat then "mkPosToken " else ""
276
| byteStrings = "BS.unpack"