2
BNF Converter: Alex 2.0 Generator
3
Copyright (C) 2004 Author: Peter Gammie
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 2 of the License, or
8
(at your option) any later version.
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software
17
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
-------------------------------------------------------------------
23
-- Copyright : (C)opyright 2003, {aarne,markus,peteg} at cs dot chalmers dot se
24
-- License : GPL (see COPYING for details)
26
-- Maintainer : {markus,aarne} at cs dot chalmers dot se
28
-- Portability : Haskell98
30
-- Hacked version of @CFtoAlex@ to cope with Alex2.
32
-------------------------------------------------------------------
33
module CFtoAlex2 (cf2alex2) where
38
-- For RegToAlex, see below.
42
cf2alex2 :: String -> String -> String -> Bool -> Bool -> CF -> String
43
cf2alex2 name errMod shareMod shareStrings byteStrings cf =
44
unlines $ concat $ intersperse [""] [
45
prelude name errMod shareMod shareStrings byteStrings,
48
restOfAlex shareMod shareStrings byteStrings cf
51
prelude :: String -> String -> String -> Bool -> Bool -> [String]
52
prelude name errMod shareMod shareStrings byteStrings = [
54
"-- This Alex file was machine-generated by the BNF converter",
56
"{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}",
57
"module " ++ name ++ " where",
59
-- "import " ++ errMod,
60
if shareStrings then "import " ++ shareMod else "",
61
if byteStrings then "import qualified Data.ByteString.Char8 as BS" else "",
68
"$l = [a-zA-Z\\192 - \\255] # [\\215 \\247] -- isolatin1 letter FIXME",
69
"$c = [A-Z\\192-\\221] # [\\215] -- capital isolatin1 letter FIXME",
70
"$s = [a-z\\222-\\255] # [\\247] -- small isolatin1 letter FIXME",
71
"$d = [0-9] -- digit",
72
"$i = [$l $d _ '] -- identifier character",
73
"$u = [\\0-\\255] -- universal: any character"
76
rMacros :: CF -> [String]
78
let symbs = symbols cf
80
(if null symbs then [] else [
81
"@rsyms = -- symbols and non-identifier-like reserved words",
82
" " ++ unwords (intersperse "|" (map mkEsc symbs))
86
esc s = if null a then rest else show a : rest
87
where (a,r) = span isAlphaNum s
91
where s = if isPrint c then ['\\',c]
92
else '\\':show (ord c)
94
restOfAlex :: String -> Bool -> Bool -> CF -> [String]
95
restOfAlex shareMod shareStrings byteStrings cf = [
97
lexComments (comments cf),
104
ifC "String" ("\\\" ([$u # [\\\" \\\\ \\n]] | (\\\\ (\\\" | \\\\ | \\' | n | t)))* \\\"" ++
105
"{ tok (\\p s -> PT p (TL $ share $ unescapeInitTail s)) }"),
106
ifC "Char" "\\\' ($u # [\\\' \\\\] | \\\\ [\\\\ \\\' n t]) \\' { tok (\\p s -> PT p (TC $ share s)) }",
107
ifC "Integer" "$d+ { tok (\\p s -> PT p (TI $ share s)) }",
108
ifC "Double" "$d+ \\. $d+ (e (\\-)? $d+)? { tok (\\p s -> PT p (TD $ share s)) }",
114
"share :: "++stringType++" -> "++stringType,
115
"share = " ++ if shareStrings then "shareString" else "id",
118
" TS !"++stringType++" !Int -- reserved words and symbols",
119
" | TL !"++stringType++" -- string literals",
120
" | TI !"++stringType++" -- integer literals",
121
" | TV !"++stringType++" -- identifiers",
122
" | TD !"++stringType++" -- double precision float literals",
123
" | TC !"++stringType++" -- character literals",
125
" deriving (Eq,Show,Ord)",
130
" deriving (Eq,Show,Ord)",
132
"tokenPos (PT (Pn _ l _) _ :_) = \"line \" ++ show l",
133
"tokenPos (Err (Pn _ l _) :_) = \"line \" ++ show l",
134
"tokenPos _ = \"end of file\"",
136
"posLineCol (Pn _ l c) = (l,c)",
137
"mkPosToken t@(PT p _) = (posLineCol p, prToken t)",
139
"prToken t = case t of",
140
" PT _ (TS s _) -> s",
148
"data BTree = N | B "++stringType++" Tok BTree BTree deriving (Show)",
150
"eitherResIdent :: ("++stringType++" -> Tok) -> "++stringType++" -> Tok",
151
"eitherResIdent tv s = treeFind resWords",
153
" treeFind N = tv s",
154
" treeFind (B a t left right) | s < a = treeFind left",
155
" | s > a = treeFind right",
158
"resWords = " ++ (show $ sorted2tree $ zip (sort resws) [1..]),
159
" where b s n = let bs = "++stringPack++" s",
160
" in B bs (TS bs n)",
162
"unescapeInitTail :: "++stringType++" -> "++stringType++"",
163
"unescapeInitTail = "++stringPack++" . unesc . tail . "++stringUnpack++" where",
164
" unesc s = case s of",
165
" '\\\\':c:cs | elem c ['\\\"', '\\\\', '\\\''] -> c : unesc cs",
166
" '\\\\':'n':cs -> '\\n' : unesc cs",
167
" '\\\\':'t':cs -> '\\t' : unesc cs",
169
" c:cs -> c : unesc cs",
172
"-------------------------------------------------------------------",
173
"-- Alex wrapper code.",
174
"-- A modified \"posn\" wrapper.",
175
"-------------------------------------------------------------------",
177
"data Posn = Pn !Int !Int !Int",
178
" deriving (Eq, Show,Ord)",
180
"alexStartPos :: Posn",
181
"alexStartPos = Pn 0 1 1",
183
"alexMove :: Posn -> Char -> Posn",
184
"alexMove (Pn a l c) '\\t' = Pn (a+1) l (((c+7) `div` 8)*8+1)",
185
"alexMove (Pn a l c) '\\n' = Pn (a+1) (l+1) 1",
186
"alexMove (Pn a l c) _ = Pn (a+1) l (c+1)",
188
"type AlexInput = (Posn, -- current position,",
189
" Char, -- previous char",
190
" "++stringType++") -- current input string",
192
"tokens :: "++stringType++" -> [Token]",
193
"tokens str = go (alexStartPos, '\\n', str)",
195
" go :: AlexInput -> [Token]",
196
" go inp@(pos, _, str) =",
197
" case alexScan inp 0 of",
199
" AlexError (pos, _, _) -> [Err pos]",
200
" AlexSkip inp' len -> go inp'",
201
" AlexToken inp' len act -> act pos ("++stringTake++" len str) : (go inp')",
203
"alexGetChar :: AlexInput -> Maybe (Char,AlexInput)",
204
"alexGetChar (p, _, s) =",
205
" case "++stringUncons++" s of",
206
" "++stringNilP++" -> Nothing",
207
" "++stringConsP++" ->",
208
" let p' = alexMove p c",
209
" in p' `seq` Just (c, (p', c, s))",
211
"alexInputPrevChar :: AlexInput -> Char",
212
"alexInputPrevChar (p, c, s) = c",
216
(stringType,stringTake,stringUncons,stringPack,stringUnpack,stringNilP,stringConsP)
217
| byteStrings = ("BS.ByteString", "BS.take", "BS.uncons", "BS.pack", "BS.unpack", "Nothing", "Just (c,s)")
218
| otherwise = ("String", "take", "", "id", "id", "[]", "(c:s)" )
220
ifC cat s = if isUsedCat cf cat then s else ""
221
lexComments ([],[]) = []
222
lexComments (xs,s1:ys) = '\"' : s1 ++ "\"" ++ " [.]* ; -- Toss single line comments\n" ++ lexComments (xs, ys)
223
lexComments (([l1,l2],[r1,r2]):xs,[]) = concat $
225
('\"':l1:l2:"\" ([$u # \\"), -- FIXME quotes or escape?
233
lexComments ((_:xs),[]) = lexComments (xs,[])
234
--- lexComments (xs,(_:ys)) = lexComments (xs,ys)
236
-- tokens consisting of special symbols
238
pTSpec _ = "@rsyms { tok (\\p s -> PT p (eitherResIdent (TV . share) s)) }"
240
userDefTokenTypes = unlines $
242
" { tok (\\p s -> PT p (eitherResIdent (T_" ++ name ++ " . share) s)) }"
243
| (name,exp) <- tokenPragmas cf]
244
userDefTokenConstrs = unlines $
245
[" | T_" ++ name ++ " !"++stringType | (name,_) <- tokenPragmas cf]
246
userDefTokenPrint = unlines $
247
[" PT _ (T_" ++ name ++ " s) -> s" | (name,_) <- tokenPragmas cf]
250
"$l $i* { tok (\\p s -> PT p (eitherResIdent (TV . share) s)) }"
251
--ifC "Ident" "<ident> ::= ^l ^i* { ident p = PT p . eitherResIdent TV }"
253
resws = reservedWords cf ++ symbols cf
256
data BTree = N | B String Int BTree BTree
258
instance Show BTree where
259
showsPrec _ N = showString "N"
260
showsPrec n (B s k l r) = wrap (showString "b " . shows s . showChar ' '. shows k . showChar ' '
261
. showsPrec 1 l . showChar ' '
263
where wrap f = if n > 0 then showChar '(' . f . showChar ')' else f
265
sorted2tree :: [(String,Int)] -> BTree
267
sorted2tree xs = B x n (sorted2tree t1) (sorted2tree t2) where
268
(t1,((x,n):t2)) = splitAt (length xs `div` 2) xs
271
-------------------------------------------------------------------
272
-- Inlined version of @RegToAlex@.
273
-- Syntax has changed...
274
-------------------------------------------------------------------
276
-- modified from pretty-printer generated by the BNF converter
278
-- the top-level printing method
279
printRegAlex :: Reg -> String
280
printRegAlex = render . prt 0
282
-- you may want to change render and parenth
284
render :: [String] -> String
286
where rend :: Int -> [String] -> String
287
rend i ss = case ss of
288
"[" :ts -> cons "[" $ rend i ts
289
"(" :ts -> cons "(" $ rend i ts
290
t : "," :ts -> cons t $ space "," $ rend i ts
291
t : ")" :ts -> cons t $ cons ")" $ rend i ts
292
t : "]" :ts -> cons t $ cons "]" $ rend i ts
293
t :ts -> space t $ rend i ts
298
space t s = if null s then t else t ++ " " ++ s
300
parenth :: [String] -> [String]
301
parenth ss = ["("] ++ ss ++ [")"]
303
-- the printer class does the job
305
prt :: Int -> a -> [String]
306
prtList :: [a] -> [String]
307
prtList = concat . map (prt 0)
309
instance Print a => Print [a] where
312
instance Print Char where
313
prt _ c = if isAlphaNum c then [[c]] else ['\\':[c]]
314
prtList s = map (concat . prt 0) s
316
prPrec :: Int -> Int -> [String] -> [String]
317
prPrec i j = if j<i then parenth else id
319
instance Print Ident where
320
prt _ (Ident i) = [i]
322
instance Print Reg where
324
RSeq reg0 reg -> prPrec i 2 (concat [prt 2 reg0 , prt 3 reg])
325
RAlt reg0 reg -> prPrec i 1 (concat [prt 1 reg0 , ["|"] , prt 2 reg])
326
RMinus reg0 reg -> prPrec i 1 (concat [prt 2 reg0 , ["#"] , prt 2 reg])
327
RStar reg -> prPrec i 3 (concat [prt 3 reg , ["*"]])
328
RPlus reg -> prPrec i 3 (concat [prt 3 reg , ["+"]])
329
ROpt reg -> prPrec i 3 (concat [prt 3 reg , ["?"]])
330
REps -> prPrec i 3 (["()"])
331
RChar c -> prPrec i 3 (concat [prt 0 c])
332
RAlts str -> prPrec i 3 (concat [["["],prt 0 str,["]"]])
333
RSeqs str -> prPrec i 2 (concat (map (prt 0) str))
334
RDigit -> prPrec i 3 (concat [["$d"]])
335
RLetter -> prPrec i 3 (concat [["$l"]])
336
RUpper -> prPrec i 3 (concat [["$c"]])
337
RLower -> prPrec i 3 (concat [["$s"]])
338
RAny -> prPrec i 3 (concat [["$u"]])