1
-- source include/have_utf32.inc
4
DROP TABLE IF EXISTS t1;
8
SET character_set_connection=utf32;
9
select hex('a'), hex('a ');
10
-- source include/endspace.inc
13
# Check that incomplete utf32 characters in HEX notation
14
# are left-padded with zeros
16
select hex(_utf32 0x44);
17
select hex(_utf32 0x3344);
18
select hex(_utf32 0x103344);
20
select hex(_utf32 X'44');
21
select hex(_utf32 X'3344');
22
select hex(_utf32 X'103344');
26
# Check that 0x20 is only trimmed when it is
27
# a part of real SPACE character, not just a part
28
# of a multibyte sequence.
29
# Note, CYRILLIC LETTER ER is used as an example, which
30
# is stored as 0x0420 in UCS2, thus contains 0x20 in the
31
# low byte. The second character is THREE-PER-M, U+2004,
32
# which contains 0x20 in the high byte.
35
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32;
36
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
37
SELECT hex(word) FROM t1 ORDER BY word;
38
SELECT hex(word2) FROM t1 ORDER BY word2;
42
# Check that real spaces are correctly trimmed.
46
(X'000004200000002000000020',X'000004200000002000000020'),
47
(X'000020040000002000000020',X'000020040000002000000020');
48
SELECT hex(word) FROM t1 ORDER BY word;
49
SELECT hex(word2) FROM t1 ORDER BY word2;
55
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421'));
56
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
57
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
58
SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
60
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421'));
61
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
62
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
63
SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
65
CREATE TABLE t1 SELECT
66
LPAD(_utf32 X'0420',10,_utf32 X'0421') l,
67
RPAD(_utf32 X'0420',10,_utf32 X'0421') r;
69
select hex(l), hex(r) from t1;
72
create table t1 (f1 char(30));
73
insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
74
select lpad(f1, 12, "-o-/") from t1;
77
######################################################
83
SET character_set_connection=utf32;
84
--source include/ctype_like.inc
87
SET character_set_connection=utf32;
88
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
89
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
90
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
91
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
92
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
93
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
94
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
95
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
96
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a;
99
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
100
ENGINE=MyISAM CHARACTER SET utf32;
101
INSERT INTO t1 (word) VALUES ("cat");
102
SELECT * FROM t1 WHERE word LIKE "c%";
103
SELECT * FROM t1 WHERE word LIKE "ca_";
104
SELECT * FROM t1 WHERE word LIKE "cat";
105
SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%"
106
SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_"
111
# Check that INSERT() works fine.
112
# This invokes charpos() function.
113
select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066);
114
select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066);
116
#######################################################
123
# When using a ucs2 table in MySQL,
124
# either with ucs2_general_ci or ucs2_bin collation,
125
# words are returned in an incorrect order when using ORDER BY
126
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
127
# the longest word *first* instead of last. I.E. The word "aardvark"
128
# is in the results before the word "a".
130
# If there is no index for the column, the problem does not occur.
132
# Interestingly, if there is no second column, the words are returned
133
# in the correct order.
135
# According to EXPLAIN, it looks like when the output includes columns that
136
# are not part of the index sorted on, it does a filesort, which fails.
137
# Using a straight index yields correct results.
147
bar INT(11) default 0,
151
COLLATE utf32_general_ci ;
153
INSERT INTO t1 (word) VALUES ("aar");
154
INSERT INTO t1 (word) VALUES ("a");
155
INSERT INTO t1 (word) VALUES ("aardvar");
156
INSERT INTO t1 (word) VALUES ("aardvark");
157
INSERT INTO t1 (word) VALUES ("aardvara");
158
INSERT INTO t1 (word) VALUES ("aardvarz");
159
EXPLAIN SELECT * FROM t1 ORDER BY word;
160
SELECT * FROM t1 ORDER BY word;
161
EXPLAIN SELECT word FROM t1 ORDER BY word;
162
SELECT word FROM t1 ORDER by word;
175
COLLATE utf32_general_ci;
177
INSERT INTO t1 (word) VALUES ("aar");
178
INSERT INTO t1 (word) VALUES ("a");
179
INSERT INTO t1 (word) VALUES ("aardvar");
180
INSERT INTO t1 (word) VALUES ("aardvark");
181
INSERT INTO t1 (word) VALUES ("aardvara");
182
INSERT INTO t1 (word) VALUES ("aardvarz");
183
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
184
SELECT * FROM t1 ORDER BY word;
189
# Two fields, no index
194
bar INT(11) AUTO_INCREMENT,
198
COLLATE utf32_general_ci ;
199
INSERT INTO t1 (word) VALUES ("aar");
200
INSERT INTO t1 (word) VALUES ("a" );
201
INSERT INTO t1 (word) VALUES ("aardvar");
202
INSERT INTO t1 (word) VALUES ("aardvark");
203
INSERT INTO t1 (word) VALUES ("aardvara");
204
INSERT INTO t1 (word) VALUES ("aardvarz");
205
EXPLAIN SELECT * FROM t1 ORDER BY word;
206
SELECT * FROM t1 ORDER BY word;
207
EXPLAIN SELECT word FROM t1 ORDER BY word;
208
SELECT word FROM t1 ORDER BY word;
212
# END OF Bug 1264 test
214
########################################################
218
# Check alignment for from-binary-conversion with CAST and CONVERT
220
SELECT hex(cast(0xAA as char character set utf32));
221
SELECT hex(convert(0xAA using utf32));
224
# Check alignment for string types
226
CREATE TABLE t1 (a char(10) character set utf32);
227
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
228
SELECT HEX(a) FROM t1;
231
CREATE TABLE t1 (a varchar(10) character set utf32);
232
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
233
SELECT HEX(a) FROM t1;
236
CREATE TABLE t1 (a text character set utf32);
237
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
238
SELECT HEX(a) FROM t1;
241
CREATE TABLE t1 (a mediumtext character set utf32);
242
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
243
SELECT HEX(a) FROM t1;
246
CREATE TABLE t1 (a longtext character set utf32);
247
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
248
SELECT HEX(a) FROM t1;
252
## Bug #5024 Server crashes with queries on fields
253
## with certain charset/collation settings
256
#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`);
257
#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
258
#select s1 from t1 where s1 > 'a' order by s1;
262
# Bug #5081 : UCS2 fields are filled with '0x2020'
263
# after extending field length
265
create table t1(a char(1)) default charset utf32;
266
insert into t1 values ('a'),('b'),('c');
267
alter table t1 modify a char(5);
268
select a, hex(a) from t1;
272
# Check prepare statement from an UTF32 string
275
set @str1 = 'select ?';
276
set @str2 = convert(@str1 using utf32);
277
prepare stmt1 from @str2;
278
execute stmt1 using @ivar;
281
# Check that utf32 works with ENUM and SET type
284
create table t1 (a enum('x','y','z') character set utf32);
285
show create table t1;
286
insert into t1 values ('x');
287
insert into t1 values ('y');
288
insert into t1 values ('z');
289
select a, hex(a) from t1 order by a;
290
alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32;
291
show create table t1;
292
insert into t1 values ('D');
293
insert into t1 values ('E ');
294
insert into t1 values ('ä');
295
insert into t1 values ('ö');
296
insert into t1 values ('ü');
297
select a, hex(a) from t1 order by a;
300
create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32);
301
show create table t1;
302
insert into t1 values ('x');
303
insert into t1 values ('y');
304
insert into t1 values ('z');
305
insert into t1 values ('x,y');
306
insert into t1 values ('x,y,z,ä,ö,ü');
307
select a, hex(a) from t1 order by a;
311
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
313
create table t1(a enum('a','b','c')) default character set utf32;
314
insert into t1 values('a'),('b'),('c');
315
alter table t1 add b char(1);
317
select * from t1 order by a;
321
SET collation_connection='utf32_general_ci';
322
-- source include/ctype_filesort.inc
323
-- source include/ctype_like_escape.inc
325
SET collation_connection='utf32_bin';
326
-- source include/ctype_filesort.inc
327
-- source include/ctype_like_escape.inc
330
# Bug#10344 Some string functions fail for UCS2
332
select hex(substr(_utf32 0x000000e4000000e500000068,1));
333
select hex(substr(_utf32 0x000000e4000000e500000068,2));
334
select hex(substr(_utf32 0x000000e4000000e500000068,3));
335
select hex(substr(_utf32 0x000000e4000000e500000068,-1));
336
select hex(substr(_utf32 0x000000e4000000e500000068,-2));
337
select hex(substr(_utf32 0x000000e4000000e500000068,-3));
343
# This bug also helped to find another problem that
344
# INSERT of a UCS2 string containing a negative number
345
# into a unsigned int column didn't produce warnings.
346
# This test covers both problems.
348
#SET collation_connection='ucs2_swedish_ci';
349
#CREATE TABLE t1 (Field1 int(10) default '0');
350
## no warnings, negative numbers are allowed
351
#INSERT INTO t1 VALUES ('-1');
354
#CREATE TABLE t1 (Field1 int(10) unsigned default '0');
355
## this should generate a "Data truncated" warning
356
#INSERT INTO t1 VALUES ('-1');
362
## Bug#18691 Converting number to UNICODE string returns invalid result
364
#SELECT CONVERT(103, CHAR(50) UNICODE);
365
#SELECT CONVERT(103.0, CHAR(50) UNICODE);
366
#SELECT CONVERT(-103, CHAR(50) UNICODE);
367
#SELECT CONVERT(-103.0, CHAR(50) UNICODE);
370
# Bug#9557 MyISAM utf8 table crash
373
a varchar(250) NOT NULL default '',
375
) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci;
376
insert into t1 values (0x803d);
377
insert into t1 values (0x005b);
378
select hex(a) from t1;
382
## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
385
#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB;
387
#insert into t1 values('a');
388
#create index t1f1 on t1(f1);
389
#select f1 from t1 where f1 like 'a%';
393
# Bug#9442 Set parameter make query fail if column character set is UCS2
395
create table t1 (utext varchar(20) character set utf32);
396
insert into t1 values ("lily");
397
insert into t1 values ("river");
398
prepare stmt from 'select utext from t1 where utext like ?';
400
execute stmt using @param1;
401
execute stmt using @param1;
402
select utext from t1 where utext like '%%';
404
deallocate prepare stmt;
407
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
410
a char(10) character set utf32 not null,
413
insert into t1 values (repeat(0x0000201f, 10));
414
insert into t1 values (repeat(0x00002020, 10));
415
insert into t1 values (repeat(0x00002021, 10));
416
# make sure "index read" is used
417
explain select hex(a) from t1 order by a;
418
select hex(a) from t1 order by a;
419
alter table t1 drop index a;
420
select hex(a) from t1 order by a;
424
# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
425
# over a 'ucs2' field uses a temporary table
427
#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
428
#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
429
#SELECT id, MIN(s) FROM t1 GROUP BY id;
433
## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
437
#drop table if exists bug20536;
441
#create table bug20536 (id bigint not null auto_increment primary key, name
442
#varchar(255) character set ucs2 not null);
443
#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
444
#select md5(name) from bug20536;
445
#select sha1(name) from bug20536;
446
#select make_set(3, name, upper(name)) from bug20536;
447
#select export_set(5, name, upper(name)) from bug20536;
448
#select export_set(5, name, upper(name), ",", 5) from bug20536;
451
# Bug #20108: corrupted default enum value for a ucs2 field
455
status enum('active','passive') character set utf32 collate utf32_general_ci
456
NOT NULL default 'passive'
458
SHOW CREATE TABLE t1;
459
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
460
SHOW CREATE TABLE t1;
464
# status enum('active','passive') collate ucs2_turkish_ci
465
# NOT NULL default 'passive'
467
#SHOW CREATE TABLE t2;
468
#ALTER TABLE t2 ADD a int NOT NULL AFTER status;
472
## Some broken functions: add these tests just to document current behavior.
474
## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would
475
## not be backwards compatible in all cases, so it's best to leave it alone
476
#select password(name) from bug20536;
477
#select old_password(name) from bug20536;
479
## Disable test case as encrypt relies on 'crypt' function.
480
## "decrypt" is noramlly tested in func_crypt.test which have a
481
## "have_crypt.inc" test
483
## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it
484
## doesn't return good results for strings with embedded 0 bytes. It won't be
485
## fixed unless we choose to re-implement the crypt() function ourselves to take
486
## an extra size_t string_length argument.
487
#select encrypt(name, 'SALT') from bug20536;
490
## QUOTE doesn't work with UCS2 data. It would require a total rewrite
491
## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is
492
## supported fully as a client character set.
493
#select quote(name) from bug20536;
495
#drop table bug20536;
497
--echo End of 4.1 tests
501
# Conversion from an UTF32 string to a decimal column
503
CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3));
504
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
506
SELECT *, hex(a) FROM t1;
510
# Bug#9442 Set parameter make query fail if column character set is UCS2
512
create table t1 (utext varchar(20) character set utf32);
513
insert into t1 values ("lily");
514
insert into t1 values ("river");
515
prepare stmt from 'select utext from t1 where utext like ?';
517
execute stmt using @param1;
518
execute stmt using @param1;
519
select utext from t1 where utext like '%%';
521
deallocate prepare stmt;
524
# Bug#22638 SOUNDEX broken for international characters
527
set character_set_connection=utf32;
528
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
529
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
530
select 'mood' sounds like 'mud';
532
select hex(soundex(_utf32 0x000004100000041100000412));
533
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
534
select hex(soundex(_utf32 0x000000BF000000C0));
538
# Bug #14290: character_maximum_length for text fields
540
create table t1(a blob, b text charset utf32);
541
select data_type, character_octet_length, character_maximum_length
542
from information_schema.columns where table_name='t1';
545
--echo End of 5.0 tests
547
set collation_connection=utf32_general_ci;
548
--source include/weight_string.inc
549
select hex(weight_string(_utf32 0x10000));
550
select hex(weight_string(_utf32 0x10001));
551
--source include/weight_string_l1.inc
553
set collation_connection=utf32_bin;
554
--source include/weight_string.inc
555
--source include/weight_string_l1.inc
558
set collation_connection=utf32_general_ci;
560
# Testing cs->coll->instr()
562
select position('bb' in 'abba');
565
# Testing cs->coll->hash_sort()
567
create table t1 (a varchar(10) character set utf32) engine=heap;
568
insert into t1 values ('a'),('A'),('b'),('B');
569
select * from t1 where a='a' order by binary a;
570
select hex(min(binary a)),count(*) from t1 group by a;
574
# Testing cs->cset->numchars()
576
select char_length('abcd'), octet_length('abcd');
579
# Testing cs->cset->charpos()
581
select left('abcd',2);
584
# Testing cs->cset->well_formed_length()
586
create table t1 (a varchar(10) character set utf32);
587
insert into t1 values (_utf32 0x0010FFFF);
588
--error ER_INVALID_CHARACTER_STRING
589
insert into t1 values (_utf32 0x00110000);
590
--error ER_INVALID_CHARACTER_STRING
591
insert into t1 values (_utf32 0x00110101);
592
--error ER_INVALID_CHARACTER_STRING
593
insert into t1 values (_utf32 0x01000101);
594
--error ER_INVALID_CHARACTER_STRING
595
insert into t1 values (_utf32 0x11000101);
596
select hex(a) from t1;
600
# Testing cs->cset->lengthsp()
602
create table t1 (a char(10)) character set utf32;
603
insert into t1 values ('a ');
604
select hex(a) from t1;
608
# Testing cs->cset->caseup() and cs->cset->casedn()
610
select upper('abcd'), lower('ABCD');
613
# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32
614
# Testing cs->cset->snprintf()
616
#create table t1 (a date);
617
#insert into t1 values ('2007-09-16');
622
# Testing cs->cset->l10tostr
623
# !!! Not used in the code
626
# Testing cs->cset->ll10tostr
628
create table t1 (a varchar(10) character set utf32);
629
insert into t1 values (123456);
630
select a, hex(a) from t1;
634
# Testing cs->cset->fill
635
# SOUNDEX fills strings with DIGIT ZERO up to four characters
636
select hex(soundex('a'));
639
# Testing cs->cset->strntol
640
# !!! Not used in the code
643
# Testing cs->cset->strntoul
645
create table t1 (a enum ('a','b','c')) character set utf32;
646
insert into t1 values ('1');
651
# Testing cs->cset->strntoll and cs->cset->strntoull
654
select hex(conv(convert('123' using utf32), -10, 16));
655
select hex(conv(convert('123' using utf32), 10, 16));
658
# Testing cs->cset->strntod
661
set character_set_connection=utf32;
663
select 1.1 + '1.2xxx';
665
# Testing strntoll10_utf32
666
# Testing cs->cset->strtoll10
667
select left('aaa','1');
670
# Testing cs->cset->strntoull10rnd
672
create table t1 (a int);
673
insert into t1 values ('-1234.1e2');
674
insert into t1 values ('-1234.1e2xxxx');
675
insert into t1 values ('-1234.1e2 ');
680
# Testing cs->cset->scan
682
create table t1 (a int);
683
insert into t1 values ('1 ');
684
insert into t1 values ('1 x');
689
# Testing auto-conversion to TEXT
691
create table t1 (a varchar(17000) character set utf32);
692
show create table t1;
696
# Testing that maximim possible key length is 1332 bytes
698
create table t1 (a varchar(250) character set utf32 primary key);
699
show create table t1;
701
--error ER_TOO_LONG_KEY
702
create table t1 (a varchar(334) character set utf32 primary key);
705
# Testing mi_check with long key values
707
create table t1 (a varchar(333) character set utf32, key(a));
708
insert into t1 values (repeat('a',333)), (repeat('b',333));
714
# Test basic regex functionality
716
set collation_connection=utf32_general_ci;
717
--source include/ctype_regex.inc
720
# TODO: add tests for all engines