1
(* $Id: camlcharmap.ml,v 1.17 2003/12/19 17:24:34 yori Exp $ *)
2
(* Copyright 2002, 2003 Yamagata Yoriyuki *)
8
"camlcharmap[.byte, .opt] -d outputpath file:
9
reads charmap from file and places output to outputpath."
13
Arg.String ((:=) dir),
14
"[directory]\toutputpath")]
16
if !file <> None then failwith "Too many arguments" else
21
| Some name -> open_in name, !dir
23
let escape_char = ref '\\'
24
let comment_char = ref '#'
26
let blank_pat = Str.regexp "[ \t]+"
27
let empty_line = Str.regexp "[ \t]*$"
28
let alias_pat = Str.regexp ".[ \t]*alias[ \t]+\\(.*\\)$"
30
let entry_pat = Str.regexp
31
"\\(.IRREVERSIBLE.\\)?<U\\([0-9A-F]*\\)>[ \t]+\\([^ \t]*\\)"
33
let range_pat = Str.regexp
34
"\\(.IRREVERSIBLE.\\)?<U\\([0-9A-F]*\\)>\\.\\.<U\\([0-9A-F]*\\)>[ \t]+\\([^ \t]*\\)"
39
if String.length s' < String.length s then false else
40
try for i = 0 to (String.length s) - 1 do
41
if s.[i] <> s'.[i] then raise Break
46
let codeset_name, aliases =
47
let codeset_name = ref None in
48
let aliases = ref [] in
50
let s = input_line inchan in
51
if begin_with "<code_set_name>" s then begin
52
if !codeset_name <> None then
53
failwith "Multiple definitions of the codeset name";
54
codeset_name := Some (List.nth (Str.split blank_pat s) 1)
55
end else if begin_with "<comment_char>" s then
56
comment_char := (List.nth (Str.split blank_pat s) 1).[0]
57
else if begin_with "<escape_char>" s then
58
escape_char := (List.nth (Str.split blank_pat s) 1).[0]
59
else if begin_with "<mb_cur_min>" s then ()
60
else if begin_with "<mb_cur_max>" s then ()
61
else if Str.string_match alias_pat s 0 then
62
let a = Str.split blank_pat (Str.matched_group 1 s) in
63
aliases := a @ !aliases
64
else if begin_with "CHARMAP" s then raise Break
65
else if Str.string_match empty_line s 0 || s.[0] = !comment_char then ()
66
else failwith ("Unknown header: " ^ s)
67
done; assert false with Break ->
68
match !codeset_name with
69
None -> failwith "Codeset name is not defined"
70
| Some s -> s, !aliases
72
let zero = Char.code '0'
74
let rec int_of_hex s i j =
75
let n = Char.code s.[i] in
77
if n >= 48 && n <= 57 then n - 48 else
78
if n >= 65 && n <= 70 then n - 55 else
79
if n >= 97 && n <= 102 then n - 87 else
80
invalid_arg "int_of_hex" in
82
n lsl 4 lor (int_of_hex s (i + 1) j)
84
let rec int_of_oct s i j =
85
let n = Char.code s.[i] - zero in
86
if n < 0 || n > 7 then invalid_arg "int_of_oct" else
88
n lsl 3 lor (int_of_hex s (i + 1) j)
90
let rec int_of_dec s i j =
91
let n = Char.code s.[i]- zero in
92
if n < 0 || n > 9 then invalid_arg "int_of_oct" else
94
n * 10 + (int_of_hex s (i + 1) j)
97
let b = Buffer.create 1 in
101
'x' -> int_of_hex s (i + 2) (j - 1)
102
| 'd' -> int_of_dec s (i + 2) (j - 1)
103
| _ -> int_of_oct s (i + 1) (j - 1) in
104
Buffer.add_char b (Char.chr n);
106
if i >= String.length s then () else
108
try String.index_from s (i + 1) esc with
109
Not_found -> String.length s in
111
if s.[0] <> esc then invalid_arg ("get_enc: " ^ s) else
114
(try String.index_from s 1 esc with Not_found ->
118
let int_of_name name = (int_of_string ("0x"^name))
121
let s' = String.copy s in
122
let i = String.length s' - 1 in
123
let c' = Char.chr (1 + Char.code s.[i]) in
126
let irreversible = ".IRREVERSIBLE."
128
let is_irreversible s =
129
irreversible.[0] <- !escape_char;
130
irreversible.[13] <- !escape_char;
131
begin_with irreversible s
134
let enc2u = ref [] in
135
let u2enc = ref IMap.empty in
137
let s = input_line inchan in
138
if Str.string_match entry_pat s 0 then
139
let n = int_of_name (Str.matched_group 2 s) in
140
let enc = get_enc (Str.matched_group 3 s) !escape_char in
141
enc2u := (enc, n) :: !enc2u;
142
if not (is_irreversible s || IMap.mem n !u2enc) then
143
u2enc := IMap.add n enc !u2enc;
144
else if Str.string_match range_pat s 0 then
145
let n1 = int_of_name (Str.matched_group 2 s) in
146
let n2 = int_of_name (Str.matched_group 3 s) in
147
if n1 > n2 then failwith ("Broken range: " ^ s);
148
let enc = get_enc (Str.matched_group 4 s) !escape_char in
149
let irreversible = is_irreversible s in
151
enc2u := (enc, n) :: !enc2u;
152
if not (irreversible || IMap.mem n !u2enc) then
153
u2enc := IMap.add n enc !u2enc;
156
else if Str.string_match empty_line s 0 || s.[0] = !comment_char then ()
157
else if begin_with "END CHARMAP" s then raise Break else
158
failwith ("Broken entry: " ^ s)
159
done; assert false with Break ->
162
module StringTbl = Tbl31.Make (struct
165
let hash = Hashtbl.hash
168
let ucs_to_enc = StringTbl.of_map "" u2enc
170
(* search unused ucs-character *)
173
let s = Tbl31.get ucs_to_enc i in
174
(* Printf.eprintf "%d - %s\n" i (String.escaped s); *)
178
if i > 255 then 0xffff else scan (i + 1)
181
let enc_to_ucs = Charmap.make_enc_to_ucs no_char enc2u
183
let outchan = open_out_bin (Filename.concat dir ((codeset_name)^".mar"))
191
{name = codeset_name;
192
ucs_to_enc = ucs_to_enc;
193
enc_to_ucs = enc_to_ucs});
198
let c = open_out_bin (Filename.concat dir (a ^ ".mar")) in
199
output_value c (Alias codeset_name);