3
-- (c) 2004 University of Durham, Julia Fischer
4
-- Portions of the grammar are derived from work by Leung/Mellish/Robertson
17
%lexer { lexer } { TokenEOF }
22
: Intergenic_noise Match Intergenic_noise {}
23
-- replace NSkip by Intergenic_noise?
27
| Intergenic_noise N {} -- Left-assoc, less stack?
30
: Promoter Translation {}
33
: Promoter_consensus {1}
34
| Promoter_hcv_large {2}
36
| Promoter_hcv_small {4}
42
-- regions [data from Leung (hvc_small.gr)]
44
: N V N7_skip K B K N20_skip R N12_skip {} --mod 3 = 0
45
| K N B N N D N18_skip H N9_skip V N {} --mod 3 = 0
46
| t N20_skip N6_skip t N4_skip t N6_skip {} --mod 3 = 0
53
-- regions [data from Leung (consensus.gr)]
56
: Minus_35 N15_skip Minus_10 {}
57
| Minus_35 N15_skip N1_skip Minus_10 N5_skip {}
58
| Minus_35 N15_skip N2_skip Minus_10 N5_skip {}
59
| Minus_35 N15_skip N3_skip Minus_10 N5_skip {}
60
| Minus_35 N15_skip N4_skip Minus_10 N5_skip {}
71
-- regions [data from Leung (hvc_large.gr)]
74
: H N11_skip D Y B N3_skip H N12_skip B N5_skip Y N2_skip W N4_skip {}
75
| N D N3_skip V N1_skip B N12_skip H N2_skip B D N2_skip H N2_skip H B N4_skip W N6_skip H H {}
76
| N H N B N D N6_skip H N4_skip K B N6_skip D B N3_skip B N4_skip V N4_skip H N2_skip D N7_skip {}
77
| N N D N12_skip B D N2_skip V N2_skip H D N2_skip D H B N7_skip B D N5_skip H H N6_skip {}
78
| D N D N12_skip B N5_skip H N13_skip B N H H W N6_skip H Y {}
79
| N N D N B N D N H N3_skip D N4_skip V N2_skip H N D H N6_skip H N3_skip D N6_skip H N2_skip B N3_skip {}
80
| D N8_skip H N1_skip H N1_skip D N4_skip H N3_skip V H N11_skip H N2_skip H N5_skip D N1_skip V N1_skip H {}
81
| H N3_skip B N9_skip H N12_skip H D N4_skip W B N2_skip D D H N1_skip D N5_skip D H {}
82
| V N7_skip V N2_skip D N2_skip D N6_skip B H N11_skip D D N1_skip H N1_skip H H N1_skip B N2_skip {}
83
| D N8_skip B D D N2_skip B N6_skip H N4_skip D N5_skip D N1_skip H D N2_skip D N3_skip D D N6_skip {}
84
| B N13_skip H N1_skip D H V N14_skip B N1_skip V N2_skip D N1_skip D V D N1_skip D N3_skip H {}
85
| H V N4_skip B N1_skip D N6_skip D N4_skip D N4_skip H H N3_skip B N6_skip B N1_skip D N3_skip D N1_skip D N4_skip {}
86
| W N3_skip V N9_skip D N11_skip B N1_skip D H N5_skip D H N1_skip D N1_skip H D N6_skip {}
87
| K N2_skip D N3_skip H N1_skip H N6_skip H N2_skip B N5_skip D D N7_skip V N2_skip D N1_skip H H N7_skip {}
88
| D N11_skip H D D N2_skip D N6_skip D N3_skip H N6_skip V N1_skip D D N2_skip H B N1_skip B N1_skip {}
89
| H N3_skip B N1_skip H N6_skip V N1_skip B N2_skip V N2_skip D N7_skip B N8_skip H N3_skip H D N1_skip H N1_skip H N1_skip {}
90
| B N4_skip B N12_skip H N4_skip V N2_skip H D N2_skip V H N1_skip H N2_skip H N3_skip B N1_skip K N4_skip {}
91
| W D N7_skip B N1_skip D N2_skip D N2_skip W N1_skip D H N2_skip D N12_skip D N5_skip H {}
92
| a N2_skip t N4_skip g N18_skip {}
97
-- regions [data from Leung (cart.gr)]
100
: N N t a N N N N N N N N N N N {}
101
| N N V a N N N t N N N N N N N {}
102
| t N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N t B N N N t N N N N N N N t N N N N N N N {}
105
--------------------------------------------------------------------------------------------------------------
106
--------------------------------------------------------------------------------------------------------------
109
: Start Mincodon Stop {}
110
| Start Mincodon Codon Stop {}
111
| Start Mincodon Codon Codon Stop {}
112
| Start Mincodon Codon Codon Codon Stop {}
113
| Start Mincodon Codon Codon Codon Codon Stop {}
114
| Start Mincodon Codon Codon Codon Codon Codon Stop {}
115
| Start Mincodon Codon Codon Codon Codon Codon Codon Stop {}
116
| Start Mincodon Codon Codon Codon Codon Codon Codon Codon Stop {}
117
| Start Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
118
| Start Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
119
| Start Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
120
| Start Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
121
| Start Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
122
| Start Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
123
| Start Mincodon Mincodon Stop {}
124
| Start Mincodon Mincodon Codon Stop {}
125
| Start Mincodon Mincodon Codon Codon Stop {}
126
| Start Mincodon Mincodon Codon Codon Codon Stop {}
127
| Start Mincodon Mincodon Codon Codon Codon Codon Stop {}
128
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Stop {}
129
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Stop {}
130
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Stop {}
131
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
132
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
133
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
134
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
135
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
136
| Start Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
137
| Start Mincodon Mincodon Mincodon Stop {}
138
| Start Mincodon Mincodon Mincodon Codon Stop {}
139
| Start Mincodon Mincodon Mincodon Codon Codon Stop {}
140
| Start Mincodon Mincodon Mincodon Codon Codon Codon Stop {}
141
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Stop {}
142
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Stop {}
143
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Stop {}
144
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Stop {}
145
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
146
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
147
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
148
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
149
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
150
| Start Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
151
| Start Mincodon Mincodon Mincodon Mincodon Stop {}
152
| Start Mincodon Mincodon Mincodon Mincodon Codon Stop {}
153
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Stop {}
154
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Stop {}
155
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Stop {}
156
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Stop {}
157
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Stop {}
158
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Stop {}
159
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
160
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
161
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
162
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
163
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
164
| Start Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
165
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Stop {}
166
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Stop {}
167
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Stop {}
168
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Stop {}
169
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Stop {}
170
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Stop {}
171
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Stop {}
172
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Stop {}
173
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
174
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
175
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
176
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
177
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
178
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Stop {}
179
| Start Mincodon Mincodon Mincodon Mincodon Mincodon Mincodon Stop {} --252 Basen
182
Mincodon : Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon Codon {} --42 Basen
190
: N {} -- match starts one place on
192
: N N {} -- match starts two places on
194
: N N N {} -- missing an entire codon
196
: N N N N {} -- missing 4 bases
198
: N N N N N {} -- missing 5 bases
200
: N N N N N N {} -- missing 6 bases
202
: N N N N N N N {} -- missing 8 bases
204
: N N N N N N N N {} -- missing 7 bases
206
: N N N N N N N N N {} -- missing 9 bases
208
: N N N N N N N N N N {} -- missing 10 bases
210
: N10_skip N1_skip {} -- missing 11 bases
212
: N10_skip N2_skip {} -- missing 12 bases
214
: N10_skip N3_skip {} -- missing 13 bases
216
: N10_skip N4_skip {} -- missing 14 bases
218
: N10_skip N5_skip {} -- missing 15 bases
220
: N10_skip N6_skip {} -- missing 16 bases
222
: N10_skip N7_skip {} -- missing 17 bases
224
: N10_skip N8_skip {} -- missing 18 bases
226
: N10_skip N9_skip {} -- missing 19 bases
228
: N10_skip N10_skip {} -- missing 20 bases
230
: N10_skip N10_skip N10_skip {} -- missing 30 bases
232
: N10_skip N10_skip N10_skip N10_skip {} -- missing 40 bases
234
: N10_skip N10_skip N10_skip N10_skip N10_skip {} -- missing 50 bases
236
: N10_skip N50_skip {} -- missing 40 bases
238
: N10_skip N10_skip N50_skip {} -- missing 50 bases
240
: N10_skip N10_skip N10_skip N50_skip {} -- missing 40 bases
242
: N10_skip N10_skip N10_skip N10_skip N50_skip{} -- missing 50 bases
244
: N50_skip N50_skip {}
248
-- Definitions of base categories according to the
249
-- International Union of Biochemistry (IUB)
250
-- Standard Nucleotide Codes. [Leung_data]
266
S -- strong bonding bases
270
W -- weak bonding bases
313
Start : a t g {} -- start codon
320
Codon -- any other codon
387
--%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
397
deriving (Show,Eq, Ord)
400
lexer :: String -> [Token]
402
lexer (' ':cs) = lexer cs
403
lexer ('\n':cs) = lexer cs
404
lexer ('a':cs) = Base_A : lexer cs
405
lexer ('c':cs) = Base_C : lexer cs
406
lexer ('g':cs) = Base_G : lexer cs
407
lexer ('t':cs) = Base_T : lexer cs