1
(* $Id: camlrb.ml,v 1.16 2004/04/06 22:12:40 yori Exp $ *)
2
(* Copyright 2002, 2003 Yamagata Yoriyuki *)
7
let (enc, readfile, dir) =
8
let enc = ref CharEncoding.utf8 in
9
let readfile = ref stdin in
10
let dir = ref Filename.current_dir_name in
12
["--enc", Arg.String (fun encname -> enc := CharEncoding.of_name encname),
14
"--file", Arg.String (fun filename -> readfile := open_in_bin filename),
16
(fun dirname -> dir := dirname) "ocamllocaledef --enc ENCNAME --file INPUTFILE DIRECTORY:\n\
17
Read the localedef INPUTFILE using the encoding ENCNAME \
18
and put the compiled data into DIRECTORY. \
19
DATADIR specifies where camomile put the datafiles.\
20
If ENCNAME is ommited, UTF-8 is used. \
21
If INPUTFILE is ommited, reading from stdin. \
22
If DIRECTORY is ommited, the current directory is used.";
25
module Utf8Buffer = UTF8.Buf
26
module Utf8NF = UNF.Make (UTF8)
28
let ff = 0x000c (*form feed*)
29
let cr = Char.code '\r'
30
let lf = Char.code '\n'
32
let tab = Char.code '\t'
34
let backslash = Char.code '\\'
35
let sq = Char.code '\\'
36
let dq = Char.code '\"'
38
let backslash = Str.regexp "\\\\\\\\"
40
Str.regexp "\\\\[u]\\([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]\\)"
43
Str.regexp "\\\\[v]\\([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]\\)"
47
Str.global_substitute literal_1
49
let n = int_of_string (Str.replace_matched "0x\\1" s) in
50
UTF8.init 1 (fun _ -> UChar.chr_of_uint n))
54
Str.global_substitute literal_2
56
let n = int_of_string (Str.replace_matched "0x\\1" s) in
57
UTF8.init 1 (fun _ -> UChar.chr_of_uint n))
60
Str.global_replace backslash "\\\\" s
62
let rec stream_to_list_aux a s =
63
(fun (strm__ : _ Stream.t) ->
64
match Stream.peek strm__ with
65
Some e -> Stream.junk strm__; stream_to_list_aux (e :: a) strm__
69
let stream_to_list s = stream_to_list_aux [] s
78
let rec prep (strm__ : _ Stream.t) =
79
match Stream.peek strm__ with
84
try Some (UChar.char_of u) with
87
begin match general_category u with
88
`Cc | `Cf when c <> Some '\n' -> prep rest
90
Stream.lcons (fun _ -> c, ct, u) (Stream.slazy (fun _ -> prep rest))
94
let rec remove_comment (strm__ : _ Stream.t) =
95
match Stream.peek strm__ with
96
Some (Some '/', _, _ as data) ->
99
(fun (strm__ : _ Stream.t) ->
100
match Stream.peek strm__ with
101
Some (Some '/', _, _) -> Stream.junk strm__; comment strm__
102
| Some (Some '*', _, _) -> Stream.junk strm__; comment2 strm__
105
Stream.icons data (Stream.slazy (fun _ -> remove_comment rest)))
107
| Some (Some '\"', _, _ as data) ->
110
Stream.icons data (Stream.slazy (fun _ -> in_quote rest))
114
Stream.icons data (Stream.slazy (fun _ -> remove_comment rest))
116
and comment (strm__ : _ Stream.t) =
117
match Stream.peek strm__ with
118
Some (Some ('\r' | '\n' | '\133'), _, _ | _, (`Zl | `Zp), _) ->
119
Stream.junk strm__; remove_comment strm__
120
| Some data -> Stream.junk strm__; comment strm__
122
and comment2 (strm__ : _ Stream.t) =
123
match Stream.peek strm__ with
124
Some (Some '*', _, _ as data) ->
127
(fun (strm__ : _ Stream.t) ->
128
match Stream.peek strm__ with
129
Some (Some '/', _, _) -> Stream.junk strm__; remove_comment strm__
130
| _ -> comment2 strm__)
132
| Some data -> Stream.junk strm__; comment2 strm__
134
and in_quote (strm__ : _ Stream.t) =
135
match Stream.peek strm__ with
136
Some (Some '\\', _, _ as data1) ->
138
begin match Stream.peek strm__ with
143
(Stream.icons data2 (Stream.slazy (fun _ -> in_quote rest)))
144
| _ -> raise (Stream.Error "")
146
| Some (Some '\"', _, _ as data) ->
149
Stream.icons data (Stream.slazy (fun _ -> remove_comment rest))
153
Stream.icons data (Stream.slazy (fun _ -> in_quote rest))
156
let rec merge_text (strm__ : _ Stream.t) =
157
match Stream.peek strm__ with
158
Some (Text s) -> Stream.junk strm__; do_merge s strm__
162
Stream.icons e (Stream.slazy (fun _ -> merge_text rest))
164
and do_merge s (strm__ : _ Stream.t) =
165
match Stream.peek strm__ with
166
Some (Text s') -> Stream.junk strm__; do_merge (s ^ s') strm__
170
Stream.icons (Text s)
171
(Stream.icons e (Stream.slazy (fun _ -> merge_text rest)))
175
let rec parse (strm__ : _ Stream.t) =
176
match Stream.peek strm__ with
177
Some (Some '{', _, _) ->
180
Stream.icons Brace_l (Stream.slazy (fun _ -> parse rest))
181
| Some (Some '}', _, _) ->
184
Stream.icons Brace_r (Stream.slazy (fun _ -> parse rest))
185
| Some (Some ':', _, _) ->
188
Stream.icons Colon (Stream.slazy (fun _ -> parse rest))
189
| Some (Some ',', _, _) ->
192
Stream.icons Comma (Stream.slazy (fun _ -> parse rest))
193
| Some (Some '\"', _, _) -> Stream.junk strm__; quote strm__
195
(Some ('\r' | '\n' | '\133' | '\t'), _, _ |
196
_, (`Zs | `Zl | `Zp), _) ->
197
Stream.junk strm__; parse strm__
198
| Some e -> Stream.junk strm__; text (Stream.icons e strm__)
201
let buf = Utf8Buffer.create 16 in
202
let rec loop (strm__ : _ Stream.t) =
203
match Stream.peek strm__ with
204
Some (Some '\\', _, u1) ->
206
begin match Stream.peek strm__ with
210
Utf8Buffer.add_char buf u1;
211
Utf8Buffer.add_char buf u2;
213
| _ -> raise (Stream.Error "")
215
| Some (Some '\"', _, _) ->
218
let s = Utf8Buffer.contents buf in
219
let s' = unescape s in
220
Stream.icons (Text s') (Stream.slazy (fun _ -> parse rest))
223
let rest = strm__ in Utf8Buffer.add_char buf u; loop rest
224
| _ -> failwith "A quote is not enclosed."
228
let buf = Utf8Buffer.create 16 in
229
let rec loop (strm__ : _ Stream.t) =
230
match Stream.peek strm__ with
232
(Some ('\r' | '\n' | '\133' | '\t'), _, _ |
233
_, (`Zs | `Zl | `Zp), _) ->
236
let s = Utf8Buffer.contents buf in
237
let s' = unescape s in
238
Stream.icons (Text s') (Stream.slazy (fun _ -> parse rest))
239
| Some (Some ('{' | '}' | ':' | ',' | '\"'), _, _ as e) ->
242
let s = Utf8Buffer.contents buf in
243
let s' = unescape s in
244
Stream.icons (Text s')
245
(Stream.slazy (fun _ -> parse (Stream.icons e rest)))
248
let rest = strm__ in Utf8Buffer.add_char buf u; loop rest
250
let s = Utf8Buffer.contents buf in
251
let s' = unescape s in Stream.ising (Text s')
256
let p1 = remove_comment p in
257
let tokens = parse p1 in
258
let tokens1 = merge_text tokens in let l = stream_to_list tokens1 in l
260
let string_to_binary s =
261
let n = String.length s / 2 in
262
let b = String.create n in
263
for i = 0 to n - 1 do
264
let d = int_of_string ("0x" ^ String.sub s (i * 2) 2) in
271
let load_file filename =
273
if Filename.is_implicit filename then Filename.concat !root filename
276
let c = open_in_bin file in
277
let buf = Buffer.create 16 in
278
try while true do Buffer.add_channel buf c 1 done; assert false with
279
End_of_file -> Buffer.contents buf
282
Table of (string, data) Hashtbl.t
283
| Array_data of data array
284
| String_data of string
287
| Intvect of int array
288
| Tagged of string * data
290
let rec parse_intvect l a =
292
Text num :: Comma :: rest -> parse_intvect rest (int_of_string num :: a)
293
| Text num :: rest ->
294
Intvect (Array.of_list (List.rev (int_of_string num :: a))), rest
295
| _ -> Intvect (Array.of_list (List.rev a)), l
297
let rec parse_table l a =
299
Some d, rest -> parse_table rest (d :: a)
301
let tbl = Hashtbl.create (List.length a) in
304
Tagged (name, data) -> Hashtbl.add tbl name data
305
| _ -> failwith "A broken table entry."
307
List.iter proc a; Table tbl, rest
308
and parse_array l a =
311
let (data, rest) = parse_unknown rest in
312
begin match rest with
313
Brace_r :: Comma :: rest -> parse_array rest (data :: a)
314
| Brace_r :: rest -> parse_array rest (data :: a)
315
| _ -> failwith "A brace is not enclosed."
317
| Text text :: Comma :: rest -> parse_array rest (String_data text :: a)
318
| Text text :: rest ->
319
Array_data (Array.of_list (List.rev (String_data text :: a))), rest
320
| _ -> Array_data (Array.of_list (List.rev a)), l
321
and parse_unknown l =
323
Text text :: Brace_r :: rest -> String_data text, Brace_r :: rest
324
| Text text :: Comma :: rest -> parse_array l []
325
| Text text :: rest -> parse_table l []
326
| _ -> parse_array l []
329
Text tname :: Colon :: Text "table" :: Brace_l :: rest ->
330
let (data, rest) = parse_table rest [] in
331
begin match rest with
332
Brace_r :: rest -> Some (Tagged (tname, data)), rest
333
| _ -> failwith "A brace is not enclosed."
335
| Text tname :: Colon :: Text "array" :: Brace_l :: rest ->
336
let (data, rest) = parse_array rest [] in
337
begin match rest with
338
Brace_r :: rest -> Some (Tagged (tname, data)), rest
339
| _ -> failwith "A brace is not enclosed."
341
| Text tname :: Colon :: Text "string" :: Brace_l :: Text data :: Brace_r ::
343
Some (Tagged (tname, String_data data)), rest
344
| Text tname :: Colon :: Text "bin" :: Brace_l :: Text data :: Brace_r ::
346
let b = string_to_binary data in Some (Tagged (tname, Binary b)), rest
347
| Text tname :: Colon :: Text "import" :: Brace_l :: Text filename ::
349
prerr_endline "Warning : file loading is not supported.";
350
Some (Tagged (tname, Binary "")), rest
351
| Text tname :: Colon :: Text "int" :: Brace_l :: Text num :: Brace_r ::
353
let n = int_of_string num in Some (Tagged (tname, Int n)), rest
354
| Text tname :: Colon :: Text "intvector" :: Brace_l :: rest ->
355
let (data, rest) = parse_intvect rest [] in
356
begin match rest with
357
Brace_r :: rest -> Some (Tagged (tname, data)), rest
358
| _ -> failwith "A brace is not enclosed."
360
| Text name :: Brace_l :: rest ->
361
let (data, rest) = parse_unknown rest in
362
begin match rest with
363
Brace_r :: rest -> Some (Tagged (name, data)), rest
364
| _ -> failwith "A brace is not enclosed."
369
let s = Utf8NF.nfd s in
370
let lexbuf = Lexing.from_string s in
371
let ace_info = ColParser.main ColLexer.token lexbuf in cetbl_of ace_info
379
(match Hashtbl.find tbl "CollationElements" with
381
begin match Hashtbl.find tbl "Sequence" with
382
String_data s -> col_parse s
389
{Unidata.col_info = col_info}
393
let cs = Stream.of_channel readfile in
394
let stream = CharEncoding.ustream_of enc cs in
395
let lexed = lexer stream in
396
let (data, rest) = parse_table lexed [] in
397
if rest <> [] then failwith "Strange trailing data.";
399
let locale_info = localedef entry in
400
let file = Filename.concat dir (key ^ ".mar") in
401
let c = open_out_bin file in output_value c locale_info
404
Table tbl -> Hashtbl.iter proc tbl
405
| _ -> failwith "Broken data."