1
(* $Id: uNF.ml,v 1.4 2004/11/03 11:21:51 yori Exp $ *)
2
(* Copyright 2002 Yamagata Yoriyuki. distributed with LGPL *)
6
let null = UChar.chr_of_uint 0
8
let decomposition_tbl = load_decomposition_tbl ()
9
let decomposition u = UCharTbl.get decomposition_tbl u
11
let composition_exclusion_tbl = load_composition_exclusion_tbl ()
12
let composition_exclusion u = UCharTbl.Bool.get composition_exclusion_tbl u
14
let composition_tbl = load_composition_tbl ()
15
let composition u = UCharTbl.get composition_tbl u
17
let rec add_list x = function
23
let shiftright x i j =
24
for k = j downto i do XString.set x (k + 1) (XString.get x k) done
27
let u = XString.get x j in
28
shiftright x i (j - 1);
31
let blit x i x' i' len =
32
for k = 0 to len - 1 do
33
XString.set x' (i' + k) (XString.get x (i + k));
37
match decomposition u with
38
`HangulSyllable -> Hangul.decompose u
39
| `Composite (`Canon, d) -> d
43
match decomposition u with
46
| `Composite (_, d) ->
47
List.fold_right (fun u a -> (nfkd u) @ a) d []
50
let rec canon_decompose_uchar x u =
51
match decomposition u with
53
Hangul.add_decomposition x u
54
| `Composite (`Canon, d) ->
56
| _ -> XString.add_char x u
58
class canon_decompose (c_out : UChar.t OOChannel.obj_output_channel) =
60
method put u = List.iter c_out#put (nfd u)
61
method close_out = c_out#close_out
62
method flush () = c_out#flush ()
65
let rec kompat_decompose_uchar x u =
66
match decomposition u with
68
List.iter (kompat_decompose_uchar x) d
69
| _ -> Hangul.add_decomposition x u
71
class kompat_decompose c_out : [UChar.t] OOChannel.obj_output_channel =
73
method put u = List.iter c_out#put (nfkd u)
74
method close_out = c_out#close_out
75
method flush () = c_out#flush ()
81
for i = 0 to XString.length x - 1 do
82
let u = XString.get x i in
83
let c = combined_class u in
84
if c = 0 then chead := i else begin
88
combined_class (XString.get x !pos) > c
94
class canon_reorder c_out : [UChar.t] OOChannel.obj_output_channel =
97
method private out_buf =
100
(fun (c1, _) (c2, _) -> c1 - c2)
102
List.iter (fun (_, u) -> c_out#put u) sq';
105
let c = combined_class u in
107
(if sq <> [] then self#out_buf;
111
method close_out () =
117
"uNF.canon_reorder#flush: \
118
Cannot flush the entire buffer";
122
let rec look_composition u1 = function
124
| (u, u') :: rest when u = u1 ->
125
if composition_exclusion u' || combined_class u' <> 0 then None
127
| _ :: rest -> look_composition u1 rest
129
let rec canon_compose_loop x i j x' k c' =
130
if j >= XString.length x then begin
131
blit x i x' (k + 1) (XString.length x - i);
132
k + max (XString.length x - i) 0
134
let u = XString.get x j in
135
let c = combined_class u in
136
let b = if j = i || c' <> c then (*not blocked!*)
137
match look_composition u (composition (XString.get x' k)) with
141
shiftright x i (j - 1);
145
if b && c = 0 then begin
146
blit x i x' (k + 1) (j - i + 1);
147
canon_compose_loop x (j + 1) (j + 1) x' (k + 1 + j - i) 0
149
let i' = if b then i else i + 1 in
150
let c' = if b then c else c' in
151
canon_compose_loop x i' (j + 1) x' k c'
153
let canon_compose x' x =
154
if XString.length x = 0 then () else
157
!pos < XString.length x &&
158
combined_class (XString.get x !pos) <> 0
161
if !pos < XString.length x then begin
162
XString.set x' !pos (XString.get x !pos);
163
pos := canon_compose_loop x (!pos + 1) (!pos + 1) x' !pos 0
165
XString.shrink x' (!pos + 1)
167
class canon_compose c_out : [UChar.t] OOChannel.obj_output_channel =
169
val mutable has_strt = false
170
val mutable strt = null
171
val mutable cmp_cnd = []
172
val mutable lst_cc = -1
173
val sq = Queue.create ()
174
method private set_strt u =
175
if has_strt then c_out#put strt;
176
Queue.iter c_out#put sq;
179
cmp_cnd <- composition u;
182
method private output_buffer () =
183
if has_strt then c_out#put strt;
184
Queue.iter c_out#put sq;
186
let c = combined_class u in
188
if c = 0 then self#set_strt u
191
if c = lst_cc then Queue.add u sq else
192
match look_composition u cmp_cnd with
193
Some u' -> strt <- u'
195
if c = 0 then self#set_strt u else begin
199
method close_out () =
200
self#output_buffer ();
203
self#output_buffer ();
208
let c = new canon_reorder c_out in
210
inherit canon_decompose c
214
let c = new canon_compose c_out in
215
let c = new canon_reorder c in
217
inherit canon_decompose c
218
method flush = c_out#flush
222
let c = new canon_reorder c_out in
224
inherit kompat_decompose c
225
method flush = c_out#flush
229
let c = new canon_compose c_out in
230
let c = new canon_reorder c in
232
inherit kompat_decompose c
233
method flush = c_out#flush
239
val nfd : text -> text
240
val nfkd : text -> text
241
val nfc : text -> text
242
val nfkc : text -> text
246
val create : int -> buf
248
val contents : buf -> text
249
val clear : buf -> unit
250
val reset : buf -> unit
251
val add_char : buf -> UChar.t -> unit
252
val add_string : buf -> text -> unit
253
val add_buffer : buf -> buf -> unit
256
val nfc_append : text -> text -> text
258
val put_nfd : XString.t -> text -> unit
259
val put_nfkd : XString.t -> text -> unit
260
val put_nfc : XString.t -> text -> unit
261
val put_nfkc : XString.t -> text -> unit
267
([`Inc of UChar.t list * index * 'a lazy_t ] as 'a)
268
val canon_compare : text -> text -> int
271
module Make (Text : UnicodeString.Type) =
274
type inc = [`Inc of UChar.t list * Text.index * 'a lazy_t] as 'a
277
`Inc ([], i, lazy (inc_end i))
279
let rec inc_canon_decompose t i : inc =
280
if Text.out_of_range t i then inc_end i else
281
let i' = Text.next t i in
282
`Inc (nfd (Text.look t i), i', lazy (inc_canon_decompose t i'))
284
let rec canon_insert_list ((u, c) as x) a =
287
| (u', c') as y :: rest ->
289
y :: canon_insert_list x rest
293
let rec split1_list = function
295
| (x, _) :: rest -> x :: split1_list rest
297
let canon_sort_list sq =
298
let rec loop a = function
300
| (u, 0) :: rest -> (split1_list a) @ (u :: (loop [] rest))
301
| (u, c) as x :: rest ->
302
loop (canon_insert_list x a) rest in
305
let rec read_combined_class = function
308
(u, combined_class u) :: read_combined_class rest
310
let inc_reorder f t i =
311
let `Inc (us, i', f) = f t i in
312
let rec loop (f : inc Lazy.t) a i =
313
let `Inc (us, i', f) = Lazy.force f in
314
let a' = read_combined_class us in
317
`Inc (canon_sort_list a, i, lazy (inc_end i))
319
`Inc (canon_sort_list a, i, lazy (loop f a' i'))
320
| _ -> loop f (a @ a') i' in
321
loop f (read_combined_class us) i'
324
inc_reorder inc_canon_decompose t i
326
let canon_compare t1 t2 =
327
let `Inc(us1, _, f1) = nfd_inc t1 (Text.nth t1 0) in
328
let `Inc(us2, _, f2) = nfd_inc t2 (Text.nth t2 0) in
329
let rec loop us1 us2 f1 f2 =
334
| u1 :: r1, u2 :: r2 ->
335
let sgn = UChar.compare u1 u2 in
336
if sgn <> 0 then sgn else
339
let `Inc (us1, _, f1) = Lazy.force f1 in
345
let `Inc (us2, _, f2) = Lazy.force f2 in
349
loop us1 us2 f1 f2 in
353
type index = Text.index
355
let canon_decompose x t =
356
Text.iter (canon_decompose_uchar x) t
358
let kompat_decompose x t =
359
Text.iter (kompat_decompose_uchar x) t
361
let text_of_xstring x = Text.init (XString.length x) (XString.get x)
364
let x = XString.make 0 (UChar.chr_of_uint 0) in
370
let x = XString.make 0 (UChar.chr_of_uint 0) in
371
kompat_decompose x t;
376
let x = XString.make 0 (UChar.chr_of_uint 0) in
384
let x = XString.make 0 (UChar.chr_of_uint 0) in
385
kompat_decompose x t;
398
kompat_decompose x t;
410
kompat_decompose x t;
415
module NFCBuf = struct
417
type buf = {mutable normalized : bool; mutable buf : XString.t}
420
{normalized = true; buf = XString.make ~bufsize 0 null}
423
(if not b.normalized then
424
let buf = XString.make 0 null in
425
XString.iter (canon_decompose_uchar buf) b.buf;
427
canon_compose buf buf;
429
b.normalized <- true);
430
text_of_xstring b.buf
433
b.normalized <- true;
437
b.normalized <- true;
441
b.normalized <- false;
442
XString.add_char b.buf u
445
b.normalized <- false;
446
Text.iter (XString.add_char b.buf) t
448
let add_buffer b1 b2 =
449
b1.normalized <- false;
450
XString.add_xstring b1.buf b2.buf
453
let nfc_append t1 t2 =
454
let b = XString.make 0 null in
455
canon_decompose b t1;
456
canon_decompose b t2;