1
(***********************************************************************)
5
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7
(* Copyright 1996 Institut National de Recherche en Informatique et *)
8
(* en Automatique. All rights reserved. This file is distributed *)
9
(* under the terms of the Q Public License version 1.0. *)
11
(***********************************************************************)
13
(* $Id: lexgen.ml,v 1.5 2000/12/28 13:06:39 weis Exp $ *)
15
(* Compiling a lexer definition *)
19
(* Deep abstract syntax for regular expressions *)
25
| Seq of regexp * regexp
26
| Alt of regexp * regexp
29
(* From shallow to deep syntax *)
33
let print_char_class c =
34
let print_interval low high =
36
if high - 1 > low then begin
41
let rec print_class first next = function
42
[] -> print_interval first next
45
then print_class first (next+1) l
47
print_interval first next;
48
print_class (char.code c) (char.code c + 1) l
52
| c::l -> print_class (char.code c) (char.code c + 1) l; prerr_newline()
55
let rec print_regexp = function
56
Empty -> prerr_string "Empty"
57
| Chars n -> prerr_string "Chars "; prerr_int n
58
| Action n -> prerr_string "Action "; prerr_int n
59
| Seq(r1,r2) -> print_regexp r1; prerr_string "; "; print_regexp r2
60
| Alt(r1,r2) -> prerr_string "("; print_regexp r1; prerr_string " | "; print_regexp r2; prerr_string ")"
61
| Star r -> prerr_string "("; print_regexp r; prerr_string ")*"
65
let chars = ref ([] : char list list)
66
let chars_count = ref 0
67
let actions = ref ([] : (int * location) list)
68
let actions_count = ref 0
70
let rec encode_regexp = function
73
let n = !chars_count in
74
(*** prerr_int n; prerr_char ' '; print_char_class cl; ***)
75
chars := cl :: !chars;
76
chars_count := !chars_count + 1;
79
Seq(encode_regexp r1, encode_regexp r2)
80
| Alternative(r1,r2) ->
81
Alt(encode_regexp r1, encode_regexp r2)
83
Star (encode_regexp r)
88
(fun reg (expr,act) ->
89
let act_num = !actions_count in
90
actions_count := !actions_count + 1;
91
actions := (act_num, act) :: !actions;
92
Alt(reg, Seq(encode_regexp expr, Action act_num)))
96
let encode_lexdef (Lexdef(_, ld)) =
101
let name_regexp_list =
102
List.map (fun (name, casedef) -> (name, encode_casedef casedef)) ld in
103
(* List.iter print_char_class chars; *)
104
let chr = Array.of_list (List.rev !chars)
105
and act = !actions in
108
(chr, name_regexp_list, act)
111
(* To generate directly a NFA from a regular expression.
112
Confer Aho-Sethi-Ullman, dragon book, chap. 3 *)
119
let rec merge_trans l1 l2 =
123
| ((OnChars n1 as t1) :: r1 as s1), ((OnChars n2 as t2) :: r2 as s2) ->
124
if n1 = n2 then t1 :: merge_trans r1 r2 else
125
if n1 < n2 then t1 :: merge_trans r1 s2 else
126
t2 :: merge_trans s1 r2
127
| ((ToAction n1 as t1) :: r1 as s1), ((ToAction n2 as t2) :: r2 as s2) ->
128
if n1 = n2 then t1 :: merge_trans r1 r2 else
129
if n1 < n2 then t1 :: merge_trans r1 s2 else
130
t2 :: merge_trans s1 r2
131
| ((OnChars n1 as t1) :: r1), ((ToAction n2) :: r2 as s2) ->
132
t1 :: merge_trans r1 s2
133
| ((ToAction n1) :: r1 as s1), ((OnChars n2 as t2) :: r2) ->
134
t2 :: merge_trans s1 r2
137
let rec nullable = function
141
| Seq(r1,r2) -> nullable r1 && nullable r2
142
| Alt(r1,r2) -> nullable r1 || nullable r2
146
let rec firstpos = function
148
| Chars pos -> [OnChars pos]
149
| Action act -> [ToAction act]
150
| Seq(r1,r2) -> if nullable r1
151
then merge_trans (firstpos r1) (firstpos r2)
153
| Alt(r1,r2) -> merge_trans (firstpos r1) (firstpos r2)
154
| Star r -> firstpos r
157
let rec lastpos = function
159
| Chars pos -> [OnChars pos]
160
| Action act -> [ToAction act]
161
| Seq(r1,r2) -> if nullable r2
162
then merge_trans (lastpos r1) (lastpos r2)
164
| Alt(r1,r2) -> merge_trans (lastpos r1) (lastpos r2)
165
| Star r -> lastpos r
168
let followpos size name_regexp_list =
169
let v = Array.create size [] in
170
let fill_pos first = function
171
OnChars pos -> v.(pos) <- merge_trans first v.(pos); ()
172
| ToAction _ -> () in
173
let rec fill = function
176
List.iter (fill_pos (firstpos r2)) (lastpos r1)
181
List.iter (fill_pos (firstpos r)) (lastpos r)
183
List.iter (fun (name, regexp) -> fill regexp) name_regexp_list;
187
let no_action = 0x3FFFFFFF
189
let split_trans_set =
191
(fun (act, pos_set as act_pos_set) trans ->
193
OnChars pos -> (act, pos :: pos_set)
194
| ToAction act1 -> if act1 < act then (act1, pos_set)
199
let memory = (Hashtbl.create 131 : (transition list, int) Hashtbl.t)
200
let todo = ref ([] : (transition list * int) list)
205
Hashtbl.find memory st
209
Hashtbl.add memory st nbr;
210
todo := (st, nbr) :: !todo;
213
let rec map_on_states f =
216
| (st,i)::r -> todo := r; let res = f st in (res,i) :: map_on_states f
218
let number_of_states () = !next
220
let goto_state = function
222
| ps -> Goto (get_state ps)
225
let transition_from chars follow pos_set =
226
let tr = Array.create 256 []
227
and shift = Array.create 256 Backtrack in
233
merge_trans tr.(Char.code c) follow.(pos))
237
shift.(i) <- goto_state tr.(i)
242
let translate_state chars follow state =
243
match split_trans_set state with
245
| n, ps -> Shift( (if n = no_action then No_remember else Remember n),
246
transition_from chars follow ps)
249
let make_dfa lexdef =
250
let (chars, name_regexp_list, actions) =
251
encode_lexdef lexdef in
253
List.iter (fun (name, regexp) -> prerr_string name; prerr_string " = "; print_regexp regexp; prerr_newline()) name_regexp_list;
256
followpos (Array.length chars) name_regexp_list in
258
List.map (fun (name, regexp) -> (name, get_state(firstpos regexp)))
261
map_on_states (translate_state chars follow) in
263
Array.create (number_of_states()) (Perform 0) in
264
List.iter (fun (auto, i) -> v.(i) <- auto) states;
265
(initial_states, v, actions)