1
(* $Id: lexpp_file.ml 667 2004-06-02 15:21:19Z gerd $
2
* ----------------------------------------------------------------------
9
Netstring_str.regexp "^[(][*][ \t]*\\[\\([A-Za-z0-9_-]+\\)\\][ \t]*[*][)]";;
11
let read_sections filename =
12
let f = open_in filename in
13
printf "[reading %s]\n" filename; flush stdout;
14
let current_section = ref None in
15
let current_data = Buffer.create 1000 in
16
let sections = ref [] in
18
match !current_section with
21
sections := (s, Buffer.contents current_data) :: !sections;
22
current_section := None;
26
let line = input_line f in
27
match Netstring_str.string_match section_re line 0 with
29
let section_name = Netstring_str.matched_group mtch 1 line in
30
(* save old section: *)
32
(* begin new section: *)
33
current_section := Some section_name;
34
Buffer.clear current_data;
36
Buffer.add_string current_data line;
37
Buffer.add_char current_data '\n';
48
let parse_char_classes s =
49
Uni_parser.main Uni_lexer.token (Lexing.from_string s)
53
(* The following printing functions have originally been written by Claudio
57
(* padded_string_of_int i returns the string representing the *)
58
(* integer i (i < 256) using exactly 3 digits (example: 13 -> "013") *)
60
let padded_string_of_int i =
62
"00" ^ string_of_int i
69
(* Two functions useful to print a definition *)
71
let rec print_disjunction ?(first = true) out =
74
if first then output_string out " ['b'-'a' (*empty*) ] "
76
if not first then output_string out " | " ;
78
print_disjunction ~first:false out tl
82
Uni_types.Char i -> output_string out ("'\\" ^ padded_string_of_int i ^ "'")
83
| Uni_types.Interval (l,u) ->
84
output_string out ("['\\" ^ padded_string_of_int l ^ "'-'\\" ^
85
padded_string_of_int u ^ "']")
86
| Uni_types.Identifier i -> output_string out i
87
| Uni_types.Concat rell ->
89
if List.length rel > 1 then
90
(output_string out "(" ; print_disjunction out rel ;
91
output_string out ")")
93
print_disjunction out rel
98
(* print_definition prints a definition in the format expected by ocamllex *)
100
let print_definition out { Uni_types.id = id ; Uni_types.rel = rel } =
101
output_string out ("let " ^ id ^ " =\n ") ;
102
print_disjunction out rel ;
103
output_string out "\n\n"
107
(**********************************************************************)
108
(* print a definition in the format expected by ulex: *)
109
(**********************************************************************)
111
let rec print_ulex_disjunction ?(first = true) out =
114
if first then output_string out " ['b'-'a' (*empty*) ] "
116
if not first then output_string out " | " ;
117
print_ulex_re out he ;
118
print_ulex_disjunction ~first:false out tl
120
and print_ulex_re out =
122
Uni_types.Char i -> output_string out (string_of_int i)
123
| Uni_types.Interval (l,u) ->
124
output_string out ("[" ^ string_of_int l ^ "-" ^
125
string_of_int u ^ "]")
126
| Uni_types.Identifier i -> output_string out i
127
| Uni_types.Concat rell ->
129
if List.length rel > 1 then
130
(output_string out "(" ; print_ulex_disjunction out rel ;
131
output_string out ")")
133
print_ulex_disjunction out rel
138
let print_ulex_definition out { Uni_types.id = id ; Uni_types.rel = rel } =
139
output_string out ("let regexp " ^ id ^ " =\n ") ;
140
print_ulex_disjunction out rel ;
141
output_string out "\n\n"