1
(* $Id: parse_uniset.ml,v 1.10 2003/12/19 17:24:34 yori Exp $ *)
2
(* Copyright 2002 Yamagata Yoriyuki. distributed with LGPL *)
4
let tbl_rw = ref USet.empty
7
let range_pat = Str.regexp "\\([0-9A-Fa-f]+\\)\\.\\.\\([0-9A-Fa-f]+\\)"
8
let num_pat = Str.regexp "[0-9A-Za-z]+"
11
let s = read_line () in
12
if Str.string_match line_pat s 0 then Str.matched_group 1 s else s *)
14
let prev_entry = ref 0
18
let s = read_line () in
19
if Str.string_match range_pat s 0 then
20
let u1 = UChar.chr_of_uint (int_of_string ("0x"^(Str.matched_group 1 s))) in
21
let u2 = UChar.chr_of_uint (int_of_string ("0x"^(Str.matched_group 2 s))) in
22
tbl_rw := USet.add_range u1 u2 !tbl_rw
23
else if Str.string_match num_pat s 0 then
24
let u = UChar.chr_of_uint (int_of_string ("0x"^(Str.matched_string s))) in
25
tbl_rw := USet.add u !tbl_rw
27
done with End_of_file -> ()
33
let name = ref None in
34
Arg.parse [] (fun s ->
36
None -> dir := Some s;
38
if !name = None then name := Some s else
39
raise (Arg.Bad "Too many arguments"))
40
"Parse unicode lists";
41
match !dir, !name with
42
(Some dir, Some name) -> dir, name
43
| _ -> raise (Arg.Bad "Some arguments are missing.") in
44
let c = open_out_bin (Filename.concat dir (name ^ "_set.mar")) in
45
output_value c !tbl_rw;
47
let c = open_out_bin (Filename.concat dir (name ^ ".mar")) in
48
let tbl = UCharTbl.Bool.of_set !tbl_rw in