1
/* Copyright (C) 2000 MySQL AB
3
This library is free software; you can redistribute it and/or
4
modify it under the terms of the GNU Library General Public
5
License as published by the Free Software Foundation; version 2
8
This library is distributed in the hope that it will be useful,
9
but WITHOUT ANY WARRANTY; without even the implied warranty of
10
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
Library General Public License for more details.
13
You should have received a copy of the GNU Library General Public
14
License along with this library; if not, write to the Free
15
Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18
/* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
20
#include <my_global.h>
28
#ifdef HAVE_CHARSET_ucs2
35
static uchar ctype_ucs2[] = {
37
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
38
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
39
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
40
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
41
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
43
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
44
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
45
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
55
static uchar to_lower_ucs2[] = {
56
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
57
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
58
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
59
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
60
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
61
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
62
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
63
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
64
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
65
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
66
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
67
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
68
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
69
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
70
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
71
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
74
static uchar to_upper_ucs2[] = {
75
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
76
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
77
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
78
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
79
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
81
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
82
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
83
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
84
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
85
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
86
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
87
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
88
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
89
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
90
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
94
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
95
my_wc_t * pwc, const uchar *s, const uchar *e)
97
if (s+2 > e) /* Need 2 characters */
98
return MY_CS_TOOSMALL2;
100
*pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
104
static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
105
my_wc_t wc, uchar *r, uchar *e)
108
return MY_CS_TOOSMALL2;
110
r[0]= (uchar) (wc >> 8);
111
r[1]= (uchar) (wc & 0xFF);
116
static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
117
char *dst __attribute__((unused)),
118
size_t dstlen __attribute__((unused)))
122
char *srcend= src + srclen;
123
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
124
DBUG_ASSERT(src == dst && srclen == dstlen);
126
while ((src < srcend) &&
127
(res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
129
int plane= (wc>>8) & 0xFF;
130
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
131
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
139
static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
140
ulong *n1, ulong *n2)
144
const uchar *e=s+slen;
145
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
147
while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
150
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
152
int plane = (wc>>8) & 0xFF;
153
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
154
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
156
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
163
static size_t my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
164
char * s __attribute__((unused)))
170
static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
171
char *dst __attribute__((unused)),
172
size_t dstlen __attribute__((unused)))
176
char *srcend= src + srclen;
177
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
178
DBUG_ASSERT(src == dst && srclen == dstlen);
180
while ((src < srcend) &&
181
(res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
183
int plane= (wc>>8) & 0xFF;
184
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
185
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
193
static size_t my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
194
char * s __attribute__((unused)))
200
static int my_strnncoll_ucs2(CHARSET_INFO *cs,
201
const uchar *s, size_t slen,
202
const uchar *t, size_t tlen,
206
my_wc_t UNINIT_VAR(s_wc),t_wc;
207
const uchar *se=s+slen;
208
const uchar *te=t+tlen;
209
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
211
while ( s < se && t < te )
214
s_res=my_ucs2_uni(cs,&s_wc, s, se);
215
t_res=my_ucs2_uni(cs,&t_wc, t, te);
217
if ( s_res <= 0 || t_res <= 0 )
219
/* Incorrect string, compare by char value */
220
return ((int)s[0]-(int)t[0]);
223
plane=(s_wc>>8) & 0xFF;
224
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
225
plane=(t_wc>>8) & 0xFF;
226
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
229
return s_wc > t_wc ? 1 : -1;
235
return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
239
Compare strings, discarding end space
242
my_strnncollsp_ucs2()
243
cs character set handler
244
a First string to compare
245
a_length Length of 'a'
246
b Second string to compare
247
b_length Length of 'b'
250
If one string is shorter as the other, then we space extend the other
251
so that the strings have equal length.
253
This will ensure that the following things hold:
265
static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
266
const uchar *s, size_t slen,
267
const uchar *t, size_t tlen,
268
my_bool diff_if_only_endspace_difference
269
__attribute__((unused)))
271
const uchar *se, *te;
273
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
275
/* extra safety to make sure the lengths are even numbers */
282
for (minlen= min(slen, tlen); minlen; minlen-= 2)
284
int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
285
(((int) s[0]) << 8) + (int) s[1];
287
int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort :
288
(((int) t[0]) << 8) + (int) t[1];
290
return s_wc > t_wc ? 1 : -1;
306
for ( ; s < se ; s+= 2)
308
if (s[0] || s[1] != ' ')
309
return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
316
static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
317
const char *s, const char *t, size_t len)
320
my_wc_t UNINIT_VAR(s_wc),t_wc;
321
const char *se=s+len;
322
const char *te=t+len;
323
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
325
while ( s < se && t < te )
329
s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se);
330
t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te);
332
if ( s_res <= 0 || t_res <= 0 )
334
/* Incorrect string, compare by char value */
335
return ((int)s[0]-(int)t[0]);
338
plane=(s_wc>>8) & 0xFF;
339
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
341
plane=(t_wc>>8) & 0xFF;
342
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
345
return ((int) s_wc) - ((int) t_wc);
350
return (int) ( (se-s) - (te-t) );
354
static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
356
size_t s_len= strlen(s);
357
size_t t_len= strlen(t);
358
size_t len = (s_len > t_len) ? s_len : t_len;
359
return my_strncasecmp_ucs2(cs, s, t, len);
363
static size_t my_strnxfrm_ucs2(CHARSET_INFO *cs,
364
uchar *dst, size_t dstlen, const uchar *src,
370
uchar *de = dst + dstlen;
371
const uchar *se = src + srclen;
372
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
374
while( src < se && dst < de )
376
if ((res=my_ucs2_uni(cs,&wc, src, se))<0)
383
plane=(wc>>8) & 0xFF;
384
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
386
if ((res=my_uni_ucs2(cs,wc,dst,de)) <0)
393
cs->cset->fill(cs, (char*) dst, (size_t) (de - dst), ' ');
398
static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
399
const char *b __attribute__((unused)),
400
const char *e __attribute__((unused)))
406
static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
407
uint c __attribute__((unused)))
413
static int my_vsnprintf_ucs2(char *dst, size_t n, const char* fmt, va_list ap)
415
char *start=dst, *end=dst+n-1;
420
if (dst == end) /* End of buffer */
423
*dst++='\0'; *dst++= *fmt; /* Copy ordinary char */
429
/* Skip if max size is used (to be compatible with printf) */
430
while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
436
if (*fmt == 's') /* String parameter */
438
reg2 char *par = va_arg(ap, char *);
440
size_t left_len = (size_t)(end-dst);
441
if (!par) par = (char*)"(null)";
443
if (left_len <= plen*2)
444
plen = left_len/2 - 1;
446
for ( ; plen ; plen--, dst+=2, par++)
453
else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
459
if ((size_t) (end-dst) < 32)
461
iarg = va_arg(ap, int);
463
int10_to_str((long) iarg, nbuf, -10);
465
int10_to_str((long) (uint) iarg,nbuf,10);
467
for (; pbuf[0]; pbuf++)
475
/* We come here on '%%', unknown code or too long parameter */
479
*dst++='%'; /* % used as % or unknown code */
482
DBUG_ASSERT(dst <= end);
483
*dst='\0'; /* End of errmessage */
484
return (size_t) (dst - start);
487
static size_t my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)),
488
char* to, size_t n, const char* fmt, ...)
492
return my_vsnprintf_ucs2(to, n, fmt, args);
496
long my_strntol_ucs2(CHARSET_INFO *cs,
497
const char *nptr, size_t l, int base,
498
char **endptr, int *err)
504
register unsigned int cutlim;
505
register uint32 cutoff;
507
register const uchar *s= (const uchar*) nptr;
508
register const uchar *e= (const uchar*) nptr+l;
514
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
520
case '-' : negative= !negative; break;
525
else /* No more characters or bad multibyte sequence */
529
err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
538
if (base <= 0 || base == 1 || base > 36)
545
cutoff = ((uint32)~0L) / (uint32) base;
546
cutlim = (uint) (((uint32)~0L) % (uint32) base);
549
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
552
if ( wc>='0' && wc<='9')
554
else if ( wc>='A' && wc<='Z')
556
else if ( wc>='a' && wc<='z')
562
if (res > cutoff || (res == cutoff && wc > cutlim))
566
res *= (uint32) base;
570
else if (cnv==MY_CS_ILSEQ)
579
/* No more characters */
585
*endptr = (char *) s;
595
if (res > (uint32) INT_MIN32)
598
else if (res > INT_MAX32)
604
return negative ? INT_MIN32 : INT_MAX32;
607
return (negative ? -((long) res) : (long) res);
611
ulong my_strntoul_ucs2(CHARSET_INFO *cs,
612
const char *nptr, size_t l, int base,
613
char **endptr, int *err)
619
register unsigned int cutlim;
620
register uint32 cutoff;
622
register const uchar *s= (const uchar*) nptr;
623
register const uchar *e= (const uchar*) nptr+l;
629
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
635
case '-' : negative= !negative; break;
640
else /* No more characters or bad multibyte sequence */
644
err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
653
if (base <= 0 || base == 1 || base > 36)
660
cutoff = ((uint32)~0L) / (uint32) base;
661
cutlim = (uint) (((uint32)~0L) % (uint32) base);
665
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
668
if ( wc>='0' && wc<='9')
670
else if ( wc>='A' && wc<='Z')
672
else if ( wc>='a' && wc<='z')
678
if (res > cutoff || (res == cutoff && wc > cutlim))
682
res *= (uint32) base;
686
else if (cnv==MY_CS_ILSEQ)
695
/* No more characters */
701
*endptr = (char *) s;
712
return (~(uint32) 0);
715
return (negative ? -((long) res) : (long) res);
720
longlong my_strntoll_ucs2(CHARSET_INFO *cs,
721
const char *nptr, size_t l, int base,
722
char **endptr, int *err)
728
register ulonglong cutoff;
729
register unsigned int cutlim;
730
register ulonglong res;
731
register const uchar *s= (const uchar*) nptr;
732
register const uchar *e= (const uchar*) nptr+l;
738
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
744
case '-' : negative= !negative; break;
749
else /* No more characters or bad multibyte sequence */
753
err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
762
if (base <= 0 || base == 1 || base > 36)
769
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
770
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
773
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
776
if ( wc>='0' && wc<='9')
778
else if ( wc>='A' && wc<='Z')
780
else if ( wc>='a' && wc<='z')
786
if (res > cutoff || (res == cutoff && wc > cutlim))
790
res *= (ulonglong) base;
794
else if (cnv==MY_CS_ILSEQ)
803
/* No more characters */
809
*endptr = (char *) s;
819
if (res > (ulonglong) LONGLONG_MIN)
822
else if (res > (ulonglong) LONGLONG_MAX)
828
return negative ? LONGLONG_MIN : LONGLONG_MAX;
831
return (negative ? -((longlong)res) : (longlong)res);
837
ulonglong my_strntoull_ucs2(CHARSET_INFO *cs,
838
const char *nptr, size_t l, int base,
839
char **endptr, int *err)
845
register ulonglong cutoff;
846
register unsigned int cutlim;
847
register ulonglong res;
848
register const uchar *s= (const uchar*) nptr;
849
register const uchar *e= (const uchar*) nptr+l;
855
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
861
case '-' : negative= !negative; break;
866
else /* No more characters or bad multibyte sequence */
870
err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
879
if (base <= 0 || base == 1 || base > 36)
886
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
887
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
891
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
894
if ( wc>='0' && wc<='9')
896
else if ( wc>='A' && wc<='Z')
898
else if ( wc>='a' && wc<='z')
904
if (res > cutoff || (res == cutoff && wc > cutlim))
908
res *= (ulonglong) base;
912
else if (cnv==MY_CS_ILSEQ)
921
/* No more characters */
927
*endptr = (char *) s;
938
return (~(ulonglong) 0);
941
return (negative ? -((longlong) res) : (longlong) res);
945
double my_strntod_ucs2(CHARSET_INFO *cs __attribute__((unused)),
946
char *nptr, size_t length,
947
char **endptr, int *err)
951
register char *b=buf;
952
register const uchar *s= (const uchar*) nptr;
958
/* Cut too long strings */
959
if (length >= sizeof(buf))
960
length= sizeof(buf)-1;
963
while ((cnv=cs->cset->mb_wc(cs,&wc,s,end)) > 0)
966
if (wc > (int) (uchar) 'e' || !wc)
967
break; /* Can't be part of double */
972
res= my_strtod(buf, endptr, err);
973
*endptr= nptr + (size_t) (*endptr- buf);
978
ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs __attribute__((unused)),
979
const char *nptr, size_t length,
981
char **endptr, int *err)
983
char buf[256], *b= buf;
985
const uchar *end, *s= (const uchar*) nptr;
989
/* Cut too long strings */
990
if (length >= sizeof(buf))
991
length= sizeof(buf)-1;
994
while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
997
if (wc > (int) (uchar) 'e' || !wc)
998
break; /* Can't be a number part */
1002
res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
1003
*endptr= (char*) nptr + 2 * (size_t) (*endptr- buf);
1009
This is a fast version optimized for the case of radix 10 / -10
1012
size_t my_l10tostr_ucs2(CHARSET_INFO *cs,
1013
char *dst, size_t len, int radix, long int val)
1016
register char *p, *db, *de;
1019
unsigned long int uval = (unsigned long int) val;
1021
p = &buffer[sizeof(buffer)-1];
1029
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
1030
uval = (unsigned long int)0 - uval;
1034
new_val = (long) (uval / 10);
1035
*--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
1041
*--p = '0' + (char) (val-new_val*10);
1050
for ( db=dst, de=dst+len ; (dst<de) && *p ; p++)
1052
int cnvres=cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
1058
return (int) (dst-db);
1062
size_t my_ll10tostr_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1063
char *dst, size_t len, int radix, longlong val)
1066
register char *p, *db, *de;
1069
ulonglong uval= (ulonglong) val;
1076
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
1077
uval = (ulonglong)0 - uval;
1081
p = &buffer[sizeof(buffer)-1];
1090
while (uval > (ulonglong) LONG_MAX)
1092
ulonglong quo= uval/(uint) 10;
1093
uint rem= (uint) (uval- quo* (uint) 10);
1098
long_val= (long) uval;
1099
while (long_val != 0)
1101
long quo= long_val/10;
1102
*--p = (char) ('0' + (long_val - quo*10));
1112
for ( db=dst, de=dst+len ; (dst<de) && *p ; p++)
1114
int cnvres=cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
1120
return (int) (dst-db);
1124
#undef ULONGLONG_MAX
1125
#define ULONGLONG_MAX (~(ulonglong) 0)
1126
#define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000))
1128
#define LFACTOR ULL(1000000000)
1129
#define LFACTOR1 ULL(10000000000)
1130
#define LFACTOR2 ULL(100000000000)
1132
static unsigned long lfactor[9]=
1134
1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L
1138
longlong my_strtoll10_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1139
const char *nptr, char **endptr, int *error)
1141
const char *s, *end, *start, *n_end, *true_end;
1143
unsigned long i, j, k;
1146
ulong cutoff, cutoff2, cutoff3;
1149
/* If fixed length string */
1152
/* Make sure string length is even */
1153
end= s + ((*endptr - s) / 2) * 2;
1154
while (s < end && !s[0] && (s[1] == ' ' || s[1] == '\t'))
1161
/* We don't support null terminated strings in UCS2 */
1165
/* Check for a sign. */
1167
if (!s[0] && s[1] == '-')
1169
*error= -1; /* Mark as negative number */
1174
cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
1175
cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
1176
cutoff3= MAX_NEGATIVE_NUMBER % 100;
1181
if (!s[0] && s[1] == '+')
1187
cutoff= ULONGLONG_MAX / LFACTOR2;
1188
cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
1189
cutoff3= ULONGLONG_MAX % 100;
1192
/* Handle case where we have a lot of pre-zero */
1193
if (!s[0] && s[1] == '0')
1200
goto end_i; /* Return 0 */
1202
while (!s[0] && s[1] == '0');
1203
n_end= s + 2 * INIT_CNT;
1207
/* Read first digit to check that it's a valid number */
1208
if (s[0] || (c= (s[1]-'0')) > 9)
1212
n_end= s + 2 * (INIT_CNT-1);
1215
/* Handle first 9 digits and store them in i */
1218
for (; s != n_end ; s+= 2)
1220
if (s[0] || (c= (s[1]-'0')) > 9)
1227
/* Handle next 9 digits and store them in j */
1229
start= s; /* Used to know how much to shift i */
1230
n_end= true_end= s + 2 * INIT_CNT;
1235
if (s[0] || (c= (s[1]-'0')) > 9)
1239
} while (s != n_end);
1246
if (s[0] || (c= (s[1]-'0')) > 9)
1249
/* Handle the next 1 or 2 digits and store them in k */
1252
if (s == end || s[0] || (c= (s[1]-'0')) > 9)
1258
/* number string should have ended here */
1259
if (s != end && !s[0] && (c= (s[1]-'0')) <= 9)
1262
/* Check that we didn't get an overflow with the last digit */
1263
if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
1266
li=i*LFACTOR2+ (ulonglong) j*100 + k;
1267
return (longlong) li;
1269
overflow: /* *endptr is set here */
1270
*error= MY_ERRNO_ERANGE;
1271
return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
1275
return (negative ? ((longlong) -(long) i) : (longlong) i);
1278
li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
1280
return (negative ? -((longlong) li) : (longlong) li);
1283
li=(ulonglong) i*LFACTOR+ (ulonglong) j;
1285
return (negative ? -((longlong) li) : (longlong) li);
1288
li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
1292
if (li > MAX_NEGATIVE_NUMBER)
1294
return -((longlong) li);
1296
return (longlong) li;
1299
/* There was no number to convert. */
1300
*error= MY_ERRNO_EDOM;
1301
*endptr= (char *) nptr;
1307
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1308
const char *b, const char *e)
1310
return (size_t) (e-b)/2;
1315
size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1316
const char *b __attribute__((unused)),
1317
const char *e __attribute__((unused)),
1320
size_t string_length= (size_t) (e - b);
1321
return pos > string_length ? string_length + 2 : pos * 2;
1326
size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1327
const char *b, const char *e,
1328
size_t nchars, int *error)
1330
/* Ensure string length is dividable with 2 */
1331
size_t nbytes= ((size_t) (e-b)) & ~(size_t) 1;
1334
return min(nbytes, nchars);
1339
void my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1340
char *s, size_t l, int fill)
1342
for ( ; l >= 2; s[0]= 0, s[1]= fill, s+=2, l-=2);
1347
size_t my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1348
const char *ptr, size_t length)
1350
const char *end= ptr+length;
1351
while (end > ptr+1 && end[-1] == ' ' && end[-2] == '\0')
1353
return (size_t) (end-ptr);
1358
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
1359
const char *str,const char *str_end,
1360
const char *wildstr,const char *wildend,
1361
int escape, int w_one, int w_many)
1363
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1364
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
1365
escape,w_one,w_many,uni_plane);
1370
int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
1371
const char *str,const char *str_end,
1372
const char *wildstr,const char *wildend,
1373
int escape, int w_one, int w_many)
1375
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
1376
escape,w_one,w_many,NULL);
1381
int my_strnncoll_ucs2_bin(CHARSET_INFO *cs,
1382
const uchar *s, size_t slen,
1383
const uchar *t, size_t tlen,
1384
my_bool t_is_prefix)
1387
my_wc_t UNINIT_VAR(s_wc),t_wc;
1388
const uchar *se=s+slen;
1389
const uchar *te=t+tlen;
1391
while ( s < se && t < te )
1393
s_res=my_ucs2_uni(cs,&s_wc, s, se);
1394
t_res=my_ucs2_uni(cs,&t_wc, t, te);
1396
if ( s_res <= 0 || t_res <= 0 )
1398
/* Incorrect string, compare by char value */
1399
return ((int)s[0]-(int)t[0]);
1403
return s_wc > t_wc ? 1 : -1;
1409
return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
1412
static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
1413
const uchar *s, size_t slen,
1414
const uchar *t, size_t tlen,
1415
my_bool diff_if_only_endspace_difference
1416
__attribute__((unused)))
1418
const uchar *se, *te;
1421
/* extra safety to make sure the lengths are even numbers */
1422
slen= (slen >> 1) << 1;
1423
tlen= (tlen >> 1) << 1;
1428
for (minlen= min(slen, tlen); minlen; minlen-= 2)
1430
int s_wc= s[0] * 256 + s[1];
1431
int t_wc= t[0] * 256 + t[1];
1433
return s_wc > t_wc ? 1 : -1;
1449
for ( ; s < se ; s+= 2)
1451
if (s[0] || s[1] != ' ')
1452
return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
1460
int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t)
1462
size_t s_len= strlen(s);
1463
size_t t_len= strlen(t);
1464
size_t len = (s_len > t_len) ? s_len : t_len;
1465
return my_strncasecmp_ucs2(cs, s, t, len);
1470
size_t my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
1471
uchar *dst, size_t dstlen,
1472
const uchar *src, size_t srclen)
1475
memcpy(dst,src,srclen= min(dstlen,srclen));
1476
if (dstlen > srclen)
1477
cs->cset->fill(cs, (char*) dst + srclen, dstlen - srclen, ' ');
1483
void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
1484
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
1486
const uchar *pos = key;
1490
while (key > pos+1 && key[-1] == ' ' && key[-2] == '\0')
1493
for (; pos < (uchar*) key ; pos++)
1495
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
1496
((uint)*pos)) + (nr1[0] << 8);
1502
** Calculate min_str and max_str that ranges a LIKE string.
1504
** ptr Pointer to LIKE string.
1505
** ptr_length Length of LIKE string.
1506
** escape Escape character in LIKE. (Normally '\').
1507
** All escape characters should be removed from min_str and max_str
1508
** res_length Length of min_str and max_str.
1509
** min_str Smallest case sensitive string that ranges LIKE.
1510
** Should be space padded to res_length.
1511
** max_str Largest case sensitive string that ranges LIKE.
1512
** Normally padded with the biggest character sort value.
1514
** The function should return 0 if ok and 1 if the LIKE string can't be
1518
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
1519
const char *ptr, size_t ptr_length,
1520
pbool escape, pbool w_one, pbool w_many,
1522
char *min_str,char *max_str,
1523
size_t *min_length,size_t *max_length)
1525
const char *end=ptr+ptr_length;
1526
char *min_org=min_str;
1527
char *min_end=min_str+res_length;
1528
size_t charlen= res_length / cs->mbmaxlen;
1529
const char *contraction_flags= cs->contractions ?
1530
((const char*) cs->contractions) + 0x40*0x40 : NULL;
1532
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
1533
; ptr+=2, charlen--)
1535
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
1537
ptr+=2; /* Skip escape */
1538
*min_str++= *max_str++ = ptr[0];
1539
*min_str++= *max_str++ = ptr[1];
1542
if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
1544
*min_str++= (char) (cs->min_sort_char >> 8);
1545
*min_str++= (char) (cs->min_sort_char & 255);
1546
*max_str++= (char) (cs->max_sort_char >> 8);
1547
*max_str++= (char) (cs->max_sort_char & 255);
1550
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
1554
Calculate length of keys:
1555
'a\0\0... is the smallest possible string when we have space expand
1556
a\ff\ff... is the biggest possible string
1558
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
1560
*max_length= res_length;
1564
*max_str++ = (char) (cs->max_sort_char >> 8);
1565
*max_str++ = (char) (cs->max_sort_char & 255);
1566
} while (min_str + 1 < min_end);
1570
if (contraction_flags && ptr + 3 < end &&
1571
ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
1573
/* Contraction head found */
1574
if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
1576
/* Contraction head followed by a wildcard, quit */
1577
goto fill_max_and_min;
1581
Check if the second letter can be contraction part,
1582
and if two letters really produce a contraction.
1584
if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
1585
cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
1587
/* Contraction found */
1588
if (charlen == 1 || min_str + 2 >= min_end)
1590
/* Full contraction doesn't fit, quit */
1591
goto fill_max_and_min;
1594
/* Put contraction head */
1595
*min_str++= *max_str++= *ptr++;
1596
*min_str++= *max_str++= *ptr++;
1600
/* Put contraction tail, or a single character */
1601
*min_str++= *max_str++ = ptr[0];
1602
*min_str++= *max_str++ = ptr[1];
1605
*min_length= *max_length = (size_t) (min_str - min_org);
1606
while (min_str + 1 < min_end)
1608
*min_str++ = *max_str++ = '\0';
1609
*min_str++ = *max_str++ = ' '; /* Because if key compression */
1616
size_t my_scan_ucs2(CHARSET_INFO *cs __attribute__((unused)),
1617
const char *str, const char *end, int sequence_type)
1619
const char *str0= str;
1620
end--; /* for easier loop condition, because of two bytes per character */
1622
switch (sequence_type)
1625
for ( ; str < end; str+= 2)
1627
if (str[0] != '\0' || str[1] != ' ')
1630
return (size_t) (str - str0);
1638
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
1642
my_strnncollsp_ucs2,
1644
my_strnxfrmlen_simple,
1654
static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
1657
my_strnncoll_ucs2_bin,
1658
my_strnncollsp_ucs2_bin,
1659
my_strnxfrm_ucs2_bin,
1660
my_strnxfrmlen_simple,
1662
my_wildcmp_ucs2_bin,
1663
my_strcasecmp_ucs2_bin,
1665
my_hash_sort_ucs2_bin,
1670
MY_CHARSET_HANDLER my_charset_ucs2_handler=
1673
my_ismbchar_ucs2, /* ismbchar */
1674
my_mbcharlen_ucs2, /* mbcharlen */
1677
my_well_formed_len_ucs2,
1680
my_ucs2_uni, /* mb_wc */
1681
my_uni_ucs2, /* wc_mb */
1697
my_strntoull10rnd_ucs2,
1702
CHARSET_INFO my_charset_ucs2_general_ci=
1704
35,0,0, /* number */
1705
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
1706
"ucs2", /* cs name */
1707
"ucs2_general_ci", /* name */
1709
NULL, /* tailoring */
1710
ctype_ucs2, /* ctype */
1711
to_lower_ucs2, /* to_lower */
1712
to_upper_ucs2, /* to_upper */
1713
to_upper_ucs2, /* sort_order */
1714
NULL, /* contractions */
1715
NULL, /* sort_order_big*/
1716
NULL, /* tab_to_uni */
1717
NULL, /* tab_from_uni */
1718
my_unicase_default, /* caseinfo */
1719
NULL, /* state_map */
1720
NULL, /* ident_map */
1721
1, /* strxfrm_multiply */
1722
1, /* caseup_multiply */
1723
1, /* casedn_multiply */
1726
0, /* min_sort_char */
1727
0xFFFF, /* max_sort_char */
1729
0, /* escape_with_backslash_is_dangerous */
1730
&my_charset_ucs2_handler,
1731
&my_collation_ucs2_general_ci_handler
1734
CHARSET_INFO my_charset_ucs2_bin=
1736
90,0,0, /* number */
1737
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
1738
"ucs2", /* cs name */
1739
"ucs2_bin", /* name */
1741
NULL, /* tailoring */
1742
ctype_ucs2, /* ctype */
1743
to_lower_ucs2, /* to_lower */
1744
to_upper_ucs2, /* to_upper */
1745
NULL, /* sort_order */
1746
NULL, /* contractions */
1747
NULL, /* sort_order_big*/
1748
NULL, /* tab_to_uni */
1749
NULL, /* tab_from_uni */
1750
my_unicase_default, /* caseinfo */
1751
NULL, /* state_map */
1752
NULL, /* ident_map */
1753
1, /* strxfrm_multiply */
1754
1, /* caseup_multiply */
1755
1, /* casedn_multiply */
1758
0, /* min_sort_char */
1759
0xFFFF, /* max_sort_char */
1761
0, /* escape_with_backslash_is_dangerous */
1762
&my_charset_ucs2_handler,
1763
&my_collation_ucs2_bin_handler