1
(* $Id: test_lex_utf8.ml 673 2004-06-02 22:07:53Z gerd $
2
* ----------------------------------------------------------------------
9
let n = List.length l in
10
let s = String.create (2*n) in
12
let v = List.nth l k in
13
s.[2*k] <- Char.chr(v land 0xff);
14
s.[2*k+1] <- Char.chr(v lsr 8);
16
Netconversion.recode_string `Enc_utf16_le `Enc_utf8 s
20
let lfactory = Pxp_lexers.get_lexer_factory `Enc_utf8 in
21
let lobj = lfactory # open_string s in
22
let scanner = lobj # scan_name_string in
23
let rec scan_rest () =
26
| tok -> tok :: scan_rest()
32
print_string ("Test " ^ name ^ ": ");
38
print_endline "FAILED (returns false)"
41
print_endline ("FAILED (exception " ^ Printexc.to_string ex ^ ")")
44
(**********************************************************************)
46
let name1 = [ 0x03d0; 0x03d1; 0x0958; 0x0041; 0x0A05 ];;
47
let name2 = [ 0x10d0; 0x11ba; 0x2126; 0xAc00; 0xac01 ];;
48
let name3 = [ 0xd700; 0xd701; 0x3105; 0x1f5d; 0x114c ];;
50
let s_name1 = mk_utf8_string name1 ;;
51
let s_name2 = mk_utf8_string name2 ;;
52
let s_name3 = mk_utf8_string name3 ;;
54
let ideo1 = [ 0x4e00; 0x3007; 0x9fa4 ];;
55
let ideo2 = [ 0x3021; 0x3026; 0x3027 ];;
57
let s_ideo1 = mk_utf8_string ideo1 ;;
58
let s_ideo2 = mk_utf8_string ideo2 ;;
60
let digit1 = [ 0x0030; 0x0031 ];;
61
let digit2 = [ 0x09e6; 0x09ef ];;
62
let digit3 = [ 0x30fc; 0x30fe ];;
64
let s_digit1 = mk_utf8_string digit1 ;;
65
let s_digit2 = mk_utf8_string digit2 ;;
66
let s_digit3 = mk_utf8_string digit3 ;;
68
let other1_1 = [ 0x00f7 ];;
69
let other1_2 = [ 0x0a00 ];;
70
let other1 = other1_1 @ other1_2 ;;
71
let other2_1 = [ 0x1000 ];;
72
let other2_2 = [ 0x2000 ];;
73
let other2 = other2_1 @ other2_2 ;;
75
let s_other1_1 = mk_utf8_string other1_1 ;;
76
let s_other1_2 = mk_utf8_string other1_2 ;;
77
let s_other1 = mk_utf8_string other1 ;;
78
let s_other2_1 = mk_utf8_string other2_1 ;;
79
let s_other2_2 = mk_utf8_string other2_2 ;;
80
let s_other2 = mk_utf8_string other2 ;;
83
scan s_name1 = [ Name s_name1 ]
87
scan s_name2 = [ Name s_name2 ]
91
scan s_name3 = [ Name s_name3 ]
95
scan s_digit1 = [ Nametoken s_digit1 ]
99
scan s_digit2 = [ Nametoken s_digit2 ]
103
scan s_digit3 = [ Nametoken s_digit3 ]
106
let t_name1digit1 () =
107
scan (s_name1 ^ s_digit1) = [ Name (s_name1 ^ s_digit1) ]
110
let t_name2digit2 () =
111
scan (s_name2 ^ s_digit2) = [ Name (s_name2 ^ s_digit2) ]
114
let t_name3digit3 () =
115
scan (s_name3 ^ s_digit3) = [ Name (s_name3 ^ s_digit3) ]
118
let t_name1ideo1 () =
119
scan (s_name1 ^ s_ideo1) = [ Name (s_name1 ^ s_ideo1) ]
122
let t_name2ideo2 () =
123
scan (s_name2 ^ s_ideo2) = [ Name (s_name2 ^ s_ideo2) ]
126
let t_digit1name1 () =
127
scan (s_digit1 ^ s_name1) = [ Nametoken (s_digit1 ^ s_name1) ]
130
let t_digit2name2 () =
131
scan (s_digit2 ^ s_name2) = [ Nametoken (s_digit2 ^ s_name2) ]
134
let t_digit3name3 () =
135
scan (s_digit3 ^ s_name3) = [ Nametoken (s_digit3 ^ s_name3) ]
139
scan s_other1 = [ CharData s_other1_1; CharData s_other1_2 ]
143
scan s_other2 = [ CharData s_other2_1; CharData s_other2_2 ]
146
let t_name1other1name2 () =
147
scan (s_name1 ^ s_other1 ^ s_name2 ) =
148
[ Name s_name1; CharData s_other1_1; CharData s_other1_2; Name s_name2 ]
151
let t_name2other2name3 () =
152
scan (s_name2 ^ s_other2 ^ s_name3 ) =
153
[ Name s_name2; CharData s_other2_1; CharData s_other2_2; Name s_name3 ]
156
let t_digit1other1digit2 () =
157
scan (s_digit1 ^ s_other1 ^ s_digit2 ) =
158
[ Nametoken s_digit1;
159
CharData s_other1_1; CharData s_other1_2;
163
let t_digit2other2digit3 () =
164
scan (s_digit2 ^ s_other2 ^ s_digit3 ) =
165
[ Nametoken s_digit2;
166
CharData s_other2_1; CharData s_other2_2;
171
let chr x = String.make 1 (Char.chr x) in
173
(* The wrong way to encode 0x00: *)
174
let _ = scan (chr 0b11000000 ^ chr 0b10000000) in
177
Netconversion.Malformed_code ->
182
let chr x = String.make 1 (Char.chr x) in
184
(* The wrong way to encode 0x40: *)
185
let _ = scan (chr 0b11000001 ^ chr 0b10000000) in
188
Netconversion.Malformed_code ->
193
let chr x = String.make 1 (Char.chr x) in
195
(* The wrong way to encode 0x00: *)
196
let _ = scan (chr 0b11100000 ^ chr 0b10000000 ^ chr 0b10000000) in
199
Netconversion.Malformed_code ->
204
let chr x = String.make 1 (Char.chr x) in
206
(* The wrong way to encode 0x0700: *)
207
let _ = scan (chr 0b11100000 ^ chr 0b10011100 ^ chr 0b10000000) in
210
Netconversion.Malformed_code ->
215
let chr x = String.make 1 (Char.chr x) in
217
(* The wrong way to encode 0x00: *)
218
let _ = scan (chr 0b11110000 ^ chr 0b10000000 ^ chr 0b10000000 ^ chr 0b10000000) in
221
Netconversion.Malformed_code ->
226
let chr x = String.make 1 (Char.chr x) in
228
(* The wrong way to encode 0x0700: *)
229
let _ = scan (chr 0b11110000 ^ chr 0b10000000 ^ chr 0b10011100 ^ chr 0b10000000) in
232
Netconversion.Malformed_code ->
238
let chr x = String.make 1 (Char.chr x) in
240
(* The wrong way to encode 0x7000: *)
241
let _ = scan (chr 0b11110000 ^ chr 0b10000111 ^ chr 0b10000000 ^ chr 0b10000000) in
244
Netconversion.Malformed_code ->
248
(**********************************************************************)
250
dotest "t_name1" t_name1;;
251
dotest "t_name2" t_name2;;
252
dotest "t_name3" t_name3;;
254
dotest "t_digit1" t_digit1;;
255
dotest "t_digit2" t_digit2;;
256
dotest "t_digit3" t_digit3;;
258
dotest "t_name1digit1" t_name1digit1;;
259
dotest "t_name2digit2" t_name2digit2;;
260
dotest "t_name3digit3" t_name3digit3;;
262
dotest "t_name1ideo1" t_name1ideo1;;
263
dotest "t_name2ideo2" t_name2ideo2;;
265
dotest "t_digit1name1" t_digit1name1;;
266
dotest "t_digit2name2" t_digit2name2;;
267
dotest "t_digit3name3" t_digit3name3;;
269
dotest "t_other1" t_other1;;
270
dotest "t_other2" t_other2;;
272
dotest "t_name1other1name2" t_name1other1name2;;
273
dotest "t_name2other2name3" t_name2other2name3;;
275
dotest "t_digit1other1digit2" t_digit1other1digit2;;
276
dotest "t_digit2other2digit3" t_digit2other2digit3;;
278
dotest "t_illegal1" t_illegal1;;
279
dotest "t_illegal2" t_illegal2;;
280
dotest "t_illegal3" t_illegal3;;
281
dotest "t_illegal4" t_illegal4;;
282
dotest "t_illegal5" t_illegal5;;
283
dotest "t_illegal6" t_illegal6;;
284
dotest "t_illegal7" t_illegal7;;