1
/* Copyright (C) 2002 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include <my_global.h>
19
#include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */
25
Returns the number of bytes required for strnxfrm().
28
size_t my_strnxfrmlen_simple(CHARSET_INFO *cs, size_t len)
30
return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
35
Converts a string into its sort key.
42
The my_strxfrm_xxx() function transforms a string pointed to by
43
'src' with length 'srclen' according to the charset+collation
44
pair 'cs' and copies the result key into 'dest'.
46
Comparing two strings using memcmp() after my_strnxfrm_xxx()
47
is equal to comparing two original strings with my_strnncollsp_xxx().
49
Not more than 'dstlen' bytes are written into 'dst'.
50
To garantee that the whole string is transformed, 'dstlen' must be
51
at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
52
consequent memcmp() may return a non-accurate result.
54
If the source string is too short to fill whole 'dstlen' bytes,
55
then the 'dest' string is padded up to 'dstlen', ensuring that:
61
my_strnxfrm_simple() is implemented for 8bit charsets and
62
simple collations with one-to-one string->key transformation.
64
See also implementations for various charsets/collations in
65
other ctype-xxx.c files.
74
size_t my_strnxfrm_simple(CHARSET_INFO * cs,
75
uchar *dst, size_t dstlen, uint nweights,
76
const uchar *src, size_t srclen, uint flags)
78
uchar *map= cs->sort_order;
81
if ((frmlen= min(dstlen, nweights)) > srclen)
86
for (end= src + frmlen; src < end;)
92
for (end= dst + frmlen; dst < end; dst++)
93
*dst= map[(uchar) *dst];
95
return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
96
nweights - frmlen, flags, 0);
100
int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen,
101
const uchar *t, size_t tlen,
104
size_t len = ( slen > tlen ) ? tlen : slen;
105
uchar *map= cs->sort_order;
106
if (t_is_prefix && slen > tlen)
110
if (map[*s++] != map[*t++])
111
return ((int) map[s[-1]] - (int) map[t[-1]]);
114
We can't use (slen - tlen) here as the result may be outside of the
115
precision of a signed int
117
return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
122
Compare strings, discarding end space
125
my_strnncollsp_simple()
126
cs character set handler
127
a First string to compare
128
a_length Length of 'a'
129
b Second string to compare
130
b_length Length of 'b'
131
diff_if_only_endspace_difference
132
Set to 1 if the strings should be regarded as different
133
if they only difference in end space
136
If one string is shorter as the other, then we space extend the other
137
so that the strings have equal length.
139
This will ensure that the following things hold:
151
int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
152
const uchar *b, size_t b_length,
153
my_bool diff_if_only_endspace_difference)
155
const uchar *map= cs->sort_order, *end;
159
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
160
diff_if_only_endspace_difference= 0;
163
end= a + (length= min(a_length, b_length));
166
if (map[*a++] != map[*b++])
167
return ((int) map[a[-1]] - (int) map[b[-1]]);
170
if (a_length != b_length)
173
if (diff_if_only_endspace_difference)
174
res= 1; /* Assume 'a' is bigger */
176
Check the next not space character of the longer key. If it's < ' ',
177
then it's smaller than the other key.
179
if (a_length < b_length)
181
/* put shorter key in s */
184
swap= -1; /* swap sign of result */
187
for (end= a + a_length-length; a < end ; a++)
190
return (map[*a] < ' ') ? -swap : swap;
197
size_t my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
199
register uchar *map= cs->to_upper;
201
while ((*str= (char) map[(uchar) *str]) != 0)
203
return (size_t) (str - str_orig);
207
size_t my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
209
register uchar *map= cs->to_lower;
211
while ((*str= (char) map[(uchar) *str]) != 0)
213
return (size_t) (str - str_orig);
217
size_t my_caseup_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
218
char *dst __attribute__((unused)),
219
size_t dstlen __attribute__((unused)))
221
char *end= src + srclen;
222
register uchar *map= cs->to_upper;
223
DBUG_ASSERT(src == dst && srclen == dstlen);
224
for ( ; src != end ; src++)
225
*src= (char) map[(uchar) *src];
230
size_t my_casedn_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
231
char *dst __attribute__((unused)),
232
size_t dstlen __attribute__((unused)))
234
char *end= src + srclen;
235
register uchar *map=cs->to_lower;
236
DBUG_ASSERT(src == dst && srclen == dstlen);
237
for ( ; src != end ; src++)
238
*src= (char) map[(uchar) *src];
242
int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
244
register uchar *map=cs->to_upper;
245
while (map[(uchar) *s] == map[(uchar) *t++])
247
return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
251
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
253
const uchar *end __attribute__((unused)))
256
return MY_CS_TOOSMALL;
258
*wc=cs->tab_to_uni[*str];
259
return (!wc[0] && str[0]) ? -1 : 1;
262
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
269
return MY_CS_TOOSMALL;
271
for (idx=cs->tab_from_uni; idx->tab ; idx++)
273
if (idx->from <= wc && idx->to >= wc)
275
str[0]= idx->tab[wc - idx->from];
276
return (!str[0] && wc) ? MY_CS_ILUNI : 1;
284
We can't use vsprintf here as it's not guaranteed to return
285
the length on all operating systems.
286
This function is also not called in a safe environment, so the
287
end buffer must be checked.
290
size_t my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)),
291
char* to, size_t n __attribute__((unused)),
292
const char* fmt, ...)
297
result= my_vsnprintf(to, n, fmt, args);
303
void my_hash_sort_simple(CHARSET_INFO *cs,
304
const uchar *key, size_t len,
305
ulong *nr1, ulong *nr2)
307
register uchar *sort_order=cs->sort_order;
311
Remove end space. We have to do this to be able to compare
312
'A ' and 'A' as identical
314
end= skip_trailing_space(key, len);
316
for (; key < (uchar*) end ; key++)
318
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
319
((uint) sort_order[(uint) *key])) + (nr1[0] << 8);
325
long my_strntol_8bit(CHARSET_INFO *cs,
326
const char *nptr, size_t l, int base,
327
char **endptr, int *err)
330
register uint32 cutoff;
331
register uint cutlim;
333
register const char *s;
335
const char *save, *e;
338
*err= 0; /* Initialize error indicator */
340
if (base < 0 || base == 1 || base > 36)
347
for ( ; s<e && my_isspace(cs, *s) ; s++);
354
/* Check for a sign. */
369
if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
378
if (s[1]=='X' || s[1]=='x')
392
cutoff = ((uint32)~0L) / (uint32) base;
393
cutlim = (uint) (((uint32)~0L) % (uint32) base);
397
for (c = *s; s != e; c = *++s)
399
if (c>='0' && c<='9')
401
else if (c>='A' && c<='Z')
403
else if (c>='a' && c<='z')
409
if (i > cutoff || (i == cutoff && c > cutlim))
422
*endptr = (char *) s;
426
if (i > (uint32) INT_MIN32)
429
else if (i > INT_MAX32)
435
return negative ? INT_MIN32 : INT_MAX32;
438
return (negative ? -((long) i) : (long) i);
443
*endptr = (char *) nptr;
448
ulong my_strntoul_8bit(CHARSET_INFO *cs,
449
const char *nptr, size_t l, int base,
450
char **endptr, int *err)
453
register uint32 cutoff;
454
register uint cutlim;
456
register const char *s;
458
const char *save, *e;
461
*err= 0; /* Initialize error indicator */
463
if (base < 0 || base == 1 || base > 36)
470
for( ; s<e && my_isspace(cs, *s); s++);
491
if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
500
if (s[1]=='X' || s[1]=='x')
514
cutoff = ((uint32)~0L) / (uint32) base;
515
cutlim = (uint) (((uint32)~0L) % (uint32) base);
519
for (c = *s; s != e; c = *++s)
521
if (c>='0' && c<='9')
523
else if (c>='A' && c<='Z')
525
else if (c>='a' && c<='z')
531
if (i > cutoff || (i == cutoff && c > cutlim))
544
*endptr = (char *) s;
549
return (~(uint32) 0);
552
return (negative ? -((long) i) : (long) i);
557
*endptr = (char *) nptr;
562
longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
563
const char *nptr, size_t l, int base,
564
char **endptr,int *err)
567
register ulonglong cutoff;
568
register uint cutlim;
569
register ulonglong i;
570
register const char *s, *e;
574
*err= 0; /* Initialize error indicator */
576
if (base < 0 || base == 1 || base > 36)
583
for(; s<e && my_isspace(cs,*s); s++);
604
if (base == 16 && s[0] == '0' && (s[1]=='X'|| s[1]=='x'))
613
if (s[1]=='X' || s[1]=='x')
628
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
629
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
635
register uchar c= *s;
636
if (c>='0' && c<='9')
638
else if (c>='A' && c<='Z')
640
else if (c>='a' && c<='z')
646
if (i > cutoff || (i == cutoff && c > cutlim))
650
i *= (ulonglong) base;
659
*endptr = (char *) s;
663
if (i > (ulonglong) LONGLONG_MIN)
666
else if (i > (ulonglong) LONGLONG_MAX)
672
return negative ? LONGLONG_MIN : LONGLONG_MAX;
675
return (negative ? -((longlong) i) : (longlong) i);
680
*endptr = (char *) nptr;
685
ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
686
const char *nptr, size_t l, int base,
687
char **endptr, int *err)
690
register ulonglong cutoff;
691
register uint cutlim;
692
register ulonglong i;
693
register const char *s, *e;
697
*err= 0; /* Initialize error indicator */
699
if (base < 0 || base == 1 || base > 36)
706
for(; s<e && my_isspace(cs,*s); s++);
727
if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
736
if (s[1]=='X' || s[1]=='x')
751
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
752
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
758
register uchar c= *s;
760
if (c>='0' && c<='9')
762
else if (c>='A' && c<='Z')
764
else if (c>='a' && c<='z')
770
if (i > cutoff || (i == cutoff && c > cutlim))
774
i *= (ulonglong) base;
783
*endptr = (char *) s;
788
return (~(ulonglong) 0);
791
return (negative ? -((longlong) i) : (longlong) i);
796
*endptr = (char *) nptr;
802
Read double from string
806
cs Character set information
807
str String to convert to double
808
length Optional length for string.
809
end result pointer to end of converted string
810
err Error number if failed conversion
813
If length is not INT_MAX32 or str[length] != 0 then the given str must
815
If length == INT_MAX32 the str must be \0 terminated.
817
It's implemented this way to save a buffer allocation and a memory copy.
820
Value of number in string
824
double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)),
825
char *str, size_t length,
826
char **end, int *err)
828
if (length == INT_MAX32)
829
length= 65535; /* Should be big enough */
831
return my_strtod(str, end, err);
836
This is a fast version optimized for the case of radix 10 / -10
841
size_t my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
842
char *dst, size_t len, int radix, long int val)
845
register char *p, *e;
848
unsigned long int uval = (unsigned long int) val;
850
e = p = &buffer[sizeof(buffer)-1];
857
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
858
uval= (unsigned long int)0 - uval;
865
new_val = (long) (uval / 10);
866
*--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
872
*--p = '0' + (char) (val-new_val*10);
876
len= min(len, (size_t) (e-p));
882
size_t my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
883
char *dst, size_t len, int radix,
887
register char *p, *e;
890
ulonglong uval = (ulonglong)val;
896
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
897
uval = (ulonglong)0 - uval;
904
e = p = &buffer[sizeof(buffer)-1];
914
while (uval > (ulonglong) LONG_MAX)
916
ulonglong quo= uval/(uint) 10;
917
uint rem= (uint) (uval- quo* (uint) 10);
922
long_val= (long) uval;
923
while (long_val != 0)
925
long quo= long_val/10;
926
*--p = (char) ('0' + (long_val - quo*10));
930
len= min(len, (size_t) (e-p));
938
** Compare string against string with wildcard
940
** -1 if not matched with wildcard
941
** 1 if matched with wildcard
944
#ifdef LIKE_CMP_TOUPPER
945
#define likeconv(s,A) (uchar) my_toupper(s,A)
947
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
950
#define INC_PTR(cs,A,B) (A)++
953
int my_wildcmp_8bit(CHARSET_INFO *cs,
954
const char *str,const char *str_end,
955
const char *wildstr,const char *wildend,
956
int escape, int w_one, int w_many)
958
int result= -1; /* Not found, using wildcards */
960
while (wildstr != wildend)
962
while (*wildstr != w_many && *wildstr != w_one)
964
if (*wildstr == escape && wildstr+1 != wildend)
967
if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
968
return(1); /* No match */
969
if (wildstr == wildend)
970
return(str != str_end); /* Match if both are at end */
971
result=1; /* Found an anchor char */
973
if (*wildstr == w_one)
977
if (str == str_end) /* Skip one char if possible */
979
INC_PTR(cs,str,str_end);
980
} while (++wildstr < wildend && *wildstr == w_one);
981
if (wildstr == wildend)
984
if (*wildstr == w_many)
989
/* Remove any '%' and '_' from the wild search string */
990
for (; wildstr != wildend ; wildstr++)
992
if (*wildstr == w_many)
994
if (*wildstr == w_one)
998
INC_PTR(cs,str,str_end);
1001
break; /* Not a wild character */
1003
if (wildstr == wildend)
1004
return(0); /* Ok if w_many is last */
1008
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
1011
INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
1012
cmp=likeconv(cs,cmp);
1015
while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
1017
if (str++ == str_end) return(-1);
1019
int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,
1024
} while (str != str_end && wildstr[0] != w_many);
1028
return(str != str_end ? 1 : 0);
1033
** Calculate min_str and max_str that ranges a LIKE string.
1035
** ptr Pointer to LIKE string.
1036
** ptr_length Length of LIKE string.
1037
** escape Escape character in LIKE. (Normally '\').
1038
** All escape characters should be removed from min_str and max_str
1039
** res_length Length of min_str and max_str.
1040
** min_str Smallest case sensitive string that ranges LIKE.
1041
** Should be space padded to res_length.
1042
** max_str Largest case sensitive string that ranges LIKE.
1043
** Normally padded with the biggest character sort value.
1045
** The function should return 0 if ok and 1 if the LIKE string can't be
1049
my_bool my_like_range_simple(CHARSET_INFO *cs,
1050
const char *ptr, size_t ptr_length,
1051
pbool escape, pbool w_one, pbool w_many,
1053
char *min_str,char *max_str,
1054
size_t *min_length, size_t *max_length)
1056
const char *end= ptr + ptr_length;
1057
char *min_org=min_str;
1058
char *min_end=min_str+res_length;
1059
size_t charlen= res_length / cs->mbmaxlen;
1061
for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
1063
if (*ptr == escape && ptr+1 != end)
1065
ptr++; /* Skip escape */
1066
*min_str++= *max_str++ = *ptr;
1069
if (*ptr == w_one) /* '_' in SQL */
1071
*min_str++='\0'; /* This should be min char */
1072
*max_str++= (char) cs->max_sort_char;
1075
if (*ptr == w_many) /* '%' in SQL */
1077
/* Calculate length of keys */
1078
*min_length= ((cs->state & MY_CS_BINSORT) ?
1079
(size_t) (min_str - min_org) :
1081
*max_length= res_length;
1085
*max_str++= (char) cs->max_sort_char;
1086
} while (min_str != min_end);
1089
*min_str++= *max_str++ = *ptr;
1092
*min_length= *max_length = (size_t) (min_str - min_org);
1093
while (min_str != min_end)
1094
*min_str++= *max_str++ = ' '; /* Because if key compression */
1099
size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
1101
const char *str0= str;
1104
case MY_SEQ_INTTAIL:
1107
for(str++ ; str != end && *str == '0' ; str++);
1108
return (size_t) (str - str0);
1113
for ( ; str < end ; str++)
1115
if (!my_isspace(cs,*str))
1118
return (size_t) (str - str0);
1125
void my_fill_8bit(CHARSET_INFO *cs __attribute__((unused)),
1126
char *s, size_t l, int fill)
1128
bfill((uchar*) s,l,fill);
1132
size_t my_numchars_8bit(CHARSET_INFO *cs __attribute__((unused)),
1133
const char *b, const char *e)
1135
return (size_t) (e - b);
1139
size_t my_numcells_8bit(CHARSET_INFO *cs __attribute__((unused)),
1140
const char *b, const char *e)
1142
return (size_t) (e - b);
1146
size_t my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
1147
const char *b __attribute__((unused)),
1148
const char *e __attribute__((unused)),
1155
size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
1156
const char *start, const char *end,
1157
size_t nchars, int *error)
1159
size_t nbytes= (size_t) (end-start);
1161
return min(nbytes, nchars);
1165
size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
1166
const char *ptr, size_t length)
1169
end= (const char *) skip_trailing_space((const uchar *)ptr, length);
1170
return (size_t) (end-ptr);
1174
uint my_instr_simple(CHARSET_INFO *cs,
1175
const char *b, size_t b_length,
1176
const char *s, size_t s_length,
1177
my_match_t *match, uint nmatch)
1179
register const uchar *str, *search, *end, *search_end;
1181
if (s_length <= b_length)
1191
return 1; /* Empty string is always found */
1194
str= (const uchar*) b;
1195
search= (const uchar*) s;
1196
end= (const uchar*) b+b_length-s_length+1;
1197
search_end= (const uchar*) s + s_length;
1202
if (cs->sort_order[*str++] == cs->sort_order[*search])
1204
register const uchar *i,*j;
1209
while (j != search_end)
1210
if (cs->sort_order[*i++] != cs->sort_order[*j++])
1216
match[0].end= (size_t) (str- (const uchar*)b-1);
1217
match[0].mb_len= match[0].end;
1221
match[1].beg= match[0].end;
1222
match[1].end= match[0].end+s_length;
1223
match[1].mb_len= match[1].end-match[1].beg;
1240
#define PLANE_SIZE 0x100
1241
#define PLANE_NUM 0x100
1242
#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
1244
static int pcmp(const void * f, const void * s)
1246
const uni_idx *F= (const uni_idx*) f;
1247
const uni_idx *S= (const uni_idx*) s;
1250
if (!(res=((S->nchars)-(F->nchars))))
1251
res=((F->uidx.from)-(S->uidx.to));
1255
static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(size_t))
1257
uni_idx idx[PLANE_NUM];
1261
Check that Unicode map is loaded.
1262
It can be not loaded when the collation is
1263
listed in Index.xml but not specified
1264
in the character set specific XML file.
1266
if (!cs->tab_to_uni)
1269
/* Clear plane statistics */
1270
bzero(idx,sizeof(idx));
1272
/* Count number of characters in each plane */
1273
for (i=0; i< 0x100; i++)
1275
uint16 wc=cs->tab_to_uni[i];
1276
int pl= PLANE_NUMBER(wc);
1280
if (!idx[pl].nchars)
1282
idx[pl].uidx.from=wc;
1286
idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1287
idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1293
/* Sort planes in descending order */
1294
qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1296
for (i=0; i < PLANE_NUM; i++)
1300
/* Skip empty plane */
1304
numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1305
if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
1308
bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
1310
for (ch=1; ch < PLANE_SIZE; ch++)
1312
uint16 wc=cs->tab_to_uni[ch];
1313
if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1315
int ofs= wc - idx[i].uidx.from;
1316
idx[i].uidx.tab[ofs]= ch;
1321
/* Allocate and fill reverse table for each plane */
1323
if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
1326
for (i=0; i< n; i++)
1327
cs->tab_from_uni[i]= idx[i].uidx;
1329
/* Set end-of-list marker */
1330
bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
1334
static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(size_t))
1336
cs->caseup_multiply= 1;
1337
cs->casedn_multiply= 1;
1339
return create_fromuni(cs, alloc);
1342
static void set_max_sort_char(CHARSET_INFO *cs)
1347
if (!cs->sort_order)
1350
max_char=cs->sort_order[(uchar) cs->max_sort_char];
1351
for (i= 0; i < 256; i++)
1353
if ((uchar) cs->sort_order[i] > max_char)
1355
max_char=(uchar) cs->sort_order[i];
1356
cs->max_sort_char= i;
1361
static my_bool my_coll_init_simple(CHARSET_INFO *cs,
1362
void *(*alloc)(size_t) __attribute__((unused)))
1364
set_max_sort_char(cs);
1369
longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)),
1370
const char *nptr, char **endptr, int *error)
1372
return my_strtoll10(nptr, endptr, error);
1376
int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
1377
const uchar *s, const uchar *e)
1382
return MY_CS_TOOSMALL;
1384
*ctype= cs->ctype[*s + 1];
1389
#undef ULONGLONG_MAX
1391
Needed under MetroWerks Compiler, since MetroWerks compiler does not
1392
properly handle a constant expression containing a mod operator
1394
#if defined(__NETWARE__) && defined(__MWERKS__)
1395
static ulonglong ulonglong_max= ~(ulonglong) 0;
1396
#define ULONGLONG_MAX ulonglong_max
1398
#define ULONGLONG_MAX (~(ulonglong) 0)
1399
#endif /* __NETWARE__ && __MWERKS__ */
1402
#define CUTOFF (ULONGLONG_MAX / 10)
1403
#define CUTLIM (ULONGLONG_MAX % 10)
1404
#define DIGITS_IN_ULONGLONG 20
1406
static ulonglong d10[DIGITS_IN_ULONGLONG]=
1423
1000000000000000ULL,
1424
10000000000000000ULL,
1425
100000000000000000ULL,
1426
1000000000000000000ULL,
1427
10000000000000000000ULL
1433
Convert a string to unsigned long long integer value
1437
my_strntoull10_8bit()
1438
cs in pointer to character set
1439
str in pointer to the string to be converted
1440
length in string length
1441
unsigned_flag in whether the number is unsigned
1442
endptr out pointer to the stop character
1443
error out returned error code
1446
This function takes the decimal representation of integer number
1447
from string str and converts it to an signed or unsigned
1448
long long integer value.
1449
Space characters and tab are ignored.
1450
A sign character might precede the digit characters.
1451
The number may have any number of pre-zero digits.
1452
The number may have decimal point and exponent.
1453
Rounding is always done in "away from zero" style:
1457
The function stops reading the string str after "length" bytes
1458
or at the first character that is not a part of correct number syntax:
1460
<signed numeric literal> ::=
1461
[ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1463
<exact numeric literal> ::=
1464
<unsigned integer> [ <period> [ <unsigned integer> ] ]
1465
| <period> <unsigned integer>
1466
<unsigned integer> ::= <digit>...
1469
Value of string as a signed/unsigned longlong integer
1471
endptr cannot be NULL. The function will store the end pointer
1472
to the stop character here.
1474
The error parameter contains information how things went:
1476
ERANGE If the the value of the converted number is out of range
1477
In this case the return value is:
1478
- ULONGLONG_MAX if unsigned_flag and the number was too big
1479
- 0 if unsigned_flag and the number was negative
1480
- LONGLONG_MAX if no unsigned_flag and the number is too big
1481
- LONGLONG_MIN if no unsigned_flag and the number it too big negative
1483
EDOM If the string didn't contain any digits.
1484
In this case the return value is 0.
1488
my_strntoull10rnd_8bit(CHARSET_INFO *cs __attribute__((unused)),
1489
const char *str, size_t length, int unsigned_flag,
1490
char **endptr, int *error)
1492
const char *dot, *end9, *beg, *end= str + length;
1496
int shift= 0, digits= 0, negative, addon;
1498
/* Skip leading spaces and tabs */
1499
for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
1504
if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1511
end9= (str + 9) > end ? end : (str + 9);
1512
/* Accumulate small number into ulong, for performance purposes */
1513
for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
1518
if (str >= end) /* Small number without dots and expanents */
1520
*endptr= (char*) str;
1525
*error= ul ? MY_ERRNO_ERANGE : 0;
1531
return (ulonglong) (longlong) -(long) ul;
1537
return (ulonglong) ul;
1543
/* Continue to accumulate into ulonglong */
1544
for (dot= NULL, ull= ul; str < end; str++)
1546
if ((ch= (uchar) (*str - '0')) < 10)
1548
if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1555
Adding the next digit would overflow.
1556
Remember the next digit in "addon", for rounding.
1557
Scan all digits with an optional single dot.
1566
addon= (*str >= '5');
1569
for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
1570
if (str < end && *str == '.')
1573
for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1579
for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1588
/* The second dot character */
1599
/* Unknown character, exit the loop */
1602
shift= dot ? dot - str : 0; /* Right shift */
1605
exp: /* [ E [ <sign> ] <unsigned integer> ] */
1613
if (str < end && (*str == 'e' || *str == 'E'))
1618
int negative_exp, exponent;
1619
if ((negative_exp= (*str == '-')) || *str=='+')
1625
str < end && (ch= (uchar) (*str - '0')) < 10;
1628
exponent= exponent * 10 + ch;
1630
shift+= negative_exp ? -exponent : exponent;
1634
if (shift == 0) /* No shift, check addon digit */
1638
if (ull == ULONGLONG_MAX)
1645
if (shift < 0) /* Right shift */
1649
if (-shift >= DIGITS_IN_ULONGLONG)
1650
goto ret_zero; /* Exponent is a big negative number, return 0 */
1660
if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1667
for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1670
goto ret_too_big; /* Overflow, number too big */
1674
*endptr= (char*) str;
1680
if (ull > (ulonglong) LONGLONG_MIN)
1682
*error= MY_ERRNO_ERANGE;
1683
return (ulonglong) LONGLONG_MIN;
1686
return (ulonglong) -(longlong) ull;
1690
if (ull > (ulonglong) LONGLONG_MAX)
1692
*error= MY_ERRNO_ERANGE;
1693
return (ulonglong) LONGLONG_MAX;
1700
/* Unsigned number */
1701
if (negative && ull)
1703
*error= MY_ERRNO_ERANGE;
1710
*endptr= (char*) str;
1715
*endptr= (char*) str;
1716
*error= MY_ERRNO_EDOM;
1720
*endptr= (char*) str;
1721
*error= MY_ERRNO_ERANGE;
1722
return unsigned_flag ?
1724
negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX;
1729
Check if a constant can be propagated
1732
my_propagate_simple()
1733
cs Character set information
1734
str String to convert to double
1735
length Optional length for string.
1738
Takes the string in the given charset and check
1739
if it can be safely propagated in the optimizer.
1742
s char(5) character set latin1 collate latin1_german2_ci);
1743
insert into t1 values (0xf6); -- o-umlaut
1744
select * from t1 where length(s)=1 and s='oe';
1746
The above query should return one row.
1747
We cannot convert this query into:
1748
select * from t1 where length('oe')=1 and s='oe';
1750
Currently we don't check the constant itself,
1751
and decide not to propagate a constant
1752
just if the collation itself allows tricky things
1753
like expansions and contractions. In the future
1754
we can write a more sophisticated functions to
1755
check the constants. For example, 'oa' can always
1756
be safety propagated in German2 because unlike
1757
'oe' it does not have any special meaning.
1760
1 if constant can be safely propagated
1761
0 if it is not safe to propagate the constant
1766
my_bool my_propagate_simple(CHARSET_INFO *cs __attribute__((unused)),
1767
const uchar *str __attribute__((unused)),
1768
size_t length __attribute__((unused)))
1774
my_bool my_propagate_complex(CHARSET_INFO *cs __attribute__((unused)),
1775
const uchar *str __attribute__((unused)),
1776
size_t length __attribute__((unused)))
1784
Normalize strxfrm flags
1787
my_strxfrm_flag_normalize()
1788
flags - non-normalized flags
1789
nlevels - number of levels
1792
If levels are omitted, then 1-maximum is assumed.
1793
If any level number is greater than the maximum,
1794
it is treated as the maximum.
1800
uint my_strxfrm_flag_normalize(uint flags, uint maximum)
1802
DBUG_ASSERT(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
1804
/* If levels are omitted, then 1-maximum is assumed*/
1805
if (!(flags & MY_STRXFRM_LEVEL_ALL))
1807
static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
1808
uint flag_pad= flags & MY_STRXFRM_PAD_WITH_SPACE;
1809
flags= def_level_flags[maximum] | flag_pad;
1814
uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
1815
uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1816
uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1817
uint flag_pad= flags & MY_STRXFRM_PAD_WITH_SPACE;
1820
If any level number is greater than the maximum,
1821
it is treated as the maximum.
1823
for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
1825
uint src_bit= 1 << i;
1826
uint dst_bit= 1 << min(i, maximum);
1827
if (flag_lev & src_bit)
1830
flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
1831
flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
1835
/* Check that there are no DESC or REVERSE flag for skipped level */
1836
DBUG_ASSERT(!(flag_dsc & src_bit) && !(flag_rev & src_bit));
1846
Apply DESC and REVERSE collation rules.
1849
my_strxfrm_desc_and_reverse()
1850
str - pointer to string
1851
strend - end of string
1853
level - which level, starting from 0.
1856
Apply DESC or REVERSE or both flags.
1858
If DESC flag is given, then the weights
1859
come out NOTed or negated for that level.
1861
If REVERSE flags is given, then the weights come out in
1862
reverse order for that level, that is, starting with
1863
the last character and ending with the first character.
1865
If nether DESC nor REVERSE flags are give,
1866
the string is not changed.
1869
void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
1870
uint flags, uint level)
1872
if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1874
if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1876
for (strend--; str <= strend;)
1885
for (; str < strend; str++)
1889
else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1891
for (strend--; str < strend;)
1902
my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
1903
uchar *str, uchar *frmend, uchar *strend,
1904
uint nweights, uint flags, uint level)
1906
if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
1908
uint fill_length= min((uint) (strend - frmend), nweights * cs->mbminlen);
1909
cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1910
frmend+= fill_length;
1912
my_strxfrm_desc_and_reverse(str, frmend, flags, level);
1913
return frmend - str;
1917
MY_CHARSET_HANDLER my_charset_8bit_handler=
1920
NULL, /* ismbchar */
1921
my_mbcharlen_8bit, /* mbcharlen */
1924
my_well_formed_len_8bit,
1935
my_long10_to_str_8bit,
1936
my_longlong10_to_str_8bit,
1944
my_strntoull10rnd_8bit,
1948
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
1950
my_coll_init_simple, /* init */
1951
my_strnncoll_simple,
1952
my_strnncollsp_simple,
1954
my_strnxfrmlen_simple,
1955
my_like_range_simple,
1959
my_hash_sort_simple,