1
/* Copyright (C) 2002 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
#include <my_global.h>
19
#include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */
25
Returns the number of bytes required for strnxfrm().
28
size_t my_strnxfrmlen_simple(CHARSET_INFO *cs, size_t len)
30
return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
35
Converts a string into its sort key.
42
The my_strxfrm_xxx() function transforms a string pointed to by
43
'src' with length 'srclen' according to the charset+collation
44
pair 'cs' and copies the result key into 'dest'.
46
Comparing two strings using memcmp() after my_strnxfrm_xxx()
47
is equal to comparing two original strings with my_strnncollsp_xxx().
49
Not more than 'dstlen' bytes are written into 'dst'.
50
To garantee that the whole string is transformed, 'dstlen' must be
51
at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
52
consequent memcmp() may return a non-accurate result.
54
If the source string is too short to fill whole 'dstlen' bytes,
55
then the 'dest' string is padded up to 'dstlen', ensuring that:
61
my_strnxfrm_simple() is implemented for 8bit charsets and
62
simple collations with one-to-one string->key transformation.
64
See also implementations for various charsets/collations in
65
other ctype-xxx.c files.
74
size_t my_strnxfrm_simple(CHARSET_INFO * cs,
75
uchar *dest, size_t len,
76
const uchar *src, size_t srclen)
78
uchar *map= cs->sort_order;
80
set_if_smaller(len, srclen);
84
for ( end=src+len; src < end ; )
90
for ( end=dest+len; dest < end ; dest++)
91
*dest= (char) map[(uchar) *dest];
94
bfill(dest, dstlen - len, ' ');
99
int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen,
100
const uchar *t, size_t tlen,
103
size_t len = ( slen > tlen ) ? tlen : slen;
104
uchar *map= cs->sort_order;
105
if (t_is_prefix && slen > tlen)
109
if (map[*s++] != map[*t++])
110
return ((int) map[s[-1]] - (int) map[t[-1]]);
113
We can't use (slen - tlen) here as the result may be outside of the
114
precision of a signed int
116
return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
121
Compare strings, discarding end space
124
my_strnncollsp_simple()
125
cs character set handler
126
a First string to compare
127
a_length Length of 'a'
128
b Second string to compare
129
b_length Length of 'b'
130
diff_if_only_endspace_difference
131
Set to 1 if the strings should be regarded as different
132
if they only difference in end space
135
If one string is shorter as the other, then we space extend the other
136
so that the strings have equal length.
138
This will ensure that the following things hold:
150
int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
151
const uchar *b, size_t b_length,
152
my_bool diff_if_only_endspace_difference)
154
const uchar *map= cs->sort_order, *end;
158
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
159
diff_if_only_endspace_difference= 0;
162
end= a + (length= min(a_length, b_length));
165
if (map[*a++] != map[*b++])
166
return ((int) map[a[-1]] - (int) map[b[-1]]);
169
if (a_length != b_length)
172
if (diff_if_only_endspace_difference)
173
res= 1; /* Assume 'a' is bigger */
175
Check the next not space character of the longer key. If it's < ' ',
176
then it's smaller than the other key.
178
if (a_length < b_length)
180
/* put shorter key in s */
183
swap= -1; /* swap sign of result */
186
for (end= a + a_length-length; a < end ; a++)
188
if (map[*a] != map[' '])
189
return (map[*a] < map[' ']) ? -swap : swap;
196
size_t my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
198
register uchar *map= cs->to_upper;
200
while ((*str= (char) map[(uchar) *str]) != 0)
202
return (size_t) (str - str_orig);
206
size_t my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
208
register uchar *map= cs->to_lower;
210
while ((*str= (char) map[(uchar) *str]) != 0)
212
return (size_t) (str - str_orig);
216
size_t my_caseup_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
217
char *dst __attribute__((unused)),
218
size_t dstlen __attribute__((unused)))
220
char *end= src + srclen;
221
register uchar *map= cs->to_upper;
222
DBUG_ASSERT(src == dst && srclen == dstlen);
223
for ( ; src != end ; src++)
224
*src= (char) map[(uchar) *src];
229
size_t my_casedn_8bit(CHARSET_INFO * cs, char *src, size_t srclen,
230
char *dst __attribute__((unused)),
231
size_t dstlen __attribute__((unused)))
233
char *end= src + srclen;
234
register uchar *map=cs->to_lower;
235
DBUG_ASSERT(src == dst && srclen == dstlen);
236
for ( ; src != end ; src++)
237
*src= (char) map[(uchar) *src];
241
int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
243
register uchar *map=cs->to_upper;
244
while (map[(uchar) *s] == map[(uchar) *t++])
246
return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
250
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
252
const uchar *end __attribute__((unused)))
255
return MY_CS_TOOSMALL;
257
*wc=cs->tab_to_uni[*str];
258
return (!wc[0] && str[0]) ? -1 : 1;
261
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
268
return MY_CS_TOOSMALL;
270
for (idx=cs->tab_from_uni; idx->tab ; idx++)
272
if (idx->from <= wc && idx->to >= wc)
274
str[0]= idx->tab[wc - idx->from];
275
return (!str[0] && wc) ? MY_CS_ILUNI : 1;
283
We can't use vsprintf here as it's not guaranteed to return
284
the length on all operating systems.
285
This function is also not called in a safe environment, so the
286
end buffer must be checked.
289
size_t my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)),
290
char* to, size_t n __attribute__((unused)),
291
const char* fmt, ...)
296
result= my_vsnprintf(to, n, fmt, args);
302
void my_hash_sort_simple(CHARSET_INFO *cs,
303
const uchar *key, size_t len,
304
ulong *nr1, ulong *nr2)
306
register uchar *sort_order=cs->sort_order;
307
const uchar *end= key + len;
310
Remove end space. We have to do this to be able to compare
311
'A ' and 'A' as identical
313
while (end > key && end[-1] == ' ')
316
for (; key < (uchar*) end ; key++)
318
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
319
((uint) sort_order[(uint) *key])) + (nr1[0] << 8);
325
long my_strntol_8bit(CHARSET_INFO *cs,
326
const char *nptr, size_t l, int base,
327
char **endptr, int *err)
330
register uint32 cutoff;
331
register uint cutlim;
333
register const char *s;
335
const char *save, *e;
338
*err= 0; /* Initialize error indicator */
340
if (base < 0 || base == 1 || base > 36)
347
for ( ; s<e && my_isspace(cs, *s) ; s++);
354
/* Check for a sign. */
369
if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
378
if (s[1]=='X' || s[1]=='x')
392
cutoff = ((uint32)~0L) / (uint32) base;
393
cutlim = (uint) (((uint32)~0L) % (uint32) base);
397
for (c = *s; s != e; c = *++s)
399
if (c>='0' && c<='9')
401
else if (c>='A' && c<='Z')
403
else if (c>='a' && c<='z')
409
if (i > cutoff || (i == cutoff && c > cutlim))
422
*endptr = (char *) s;
426
if (i > (uint32) INT_MIN32)
429
else if (i > INT_MAX32)
435
return negative ? INT_MIN32 : INT_MAX32;
438
return (negative ? -((long) i) : (long) i);
443
*endptr = (char *) nptr;
448
ulong my_strntoul_8bit(CHARSET_INFO *cs,
449
const char *nptr, size_t l, int base,
450
char **endptr, int *err)
453
register uint32 cutoff;
454
register uint cutlim;
456
register const char *s;
458
const char *save, *e;
461
*err= 0; /* Initialize error indicator */
463
if (base < 0 || base == 1 || base > 36)
470
for( ; s<e && my_isspace(cs, *s); s++);
491
if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
500
if (s[1]=='X' || s[1]=='x')
514
cutoff = ((uint32)~0L) / (uint32) base;
515
cutlim = (uint) (((uint32)~0L) % (uint32) base);
519
for (c = *s; s != e; c = *++s)
521
if (c>='0' && c<='9')
523
else if (c>='A' && c<='Z')
525
else if (c>='a' && c<='z')
531
if (i > cutoff || (i == cutoff && c > cutlim))
544
*endptr = (char *) s;
549
return (~(uint32) 0);
552
return (negative ? -((long) i) : (long) i);
557
*endptr = (char *) nptr;
562
longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
563
const char *nptr, size_t l, int base,
564
char **endptr,int *err)
567
register ulonglong cutoff;
568
register uint cutlim;
569
register ulonglong i;
570
register const char *s, *e;
574
*err= 0; /* Initialize error indicator */
576
if (base < 0 || base == 1 || base > 36)
583
for(; s<e && my_isspace(cs,*s); s++);
604
if (base == 16 && s[0] == '0' && (s[1]=='X'|| s[1]=='x'))
613
if (s[1]=='X' || s[1]=='x')
628
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
629
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
635
register uchar c= *s;
636
if (c>='0' && c<='9')
638
else if (c>='A' && c<='Z')
640
else if (c>='a' && c<='z')
646
if (i > cutoff || (i == cutoff && c > cutlim))
650
i *= (ulonglong) base;
659
*endptr = (char *) s;
663
if (i > (ulonglong) LONGLONG_MIN)
666
else if (i > (ulonglong) LONGLONG_MAX)
672
return negative ? LONGLONG_MIN : LONGLONG_MAX;
675
return (negative ? -((longlong) i) : (longlong) i);
680
*endptr = (char *) nptr;
685
ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
686
const char *nptr, size_t l, int base,
687
char **endptr, int *err)
690
register ulonglong cutoff;
691
register uint cutlim;
692
register ulonglong i;
693
register const char *s, *e;
697
*err= 0; /* Initialize error indicator */
699
if (base < 0 || base == 1 || base > 36)
706
for(; s<e && my_isspace(cs,*s); s++);
727
if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
736
if (s[1]=='X' || s[1]=='x')
751
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
752
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
758
register uchar c= *s;
760
if (c>='0' && c<='9')
762
else if (c>='A' && c<='Z')
764
else if (c>='a' && c<='z')
770
if (i > cutoff || (i == cutoff && c > cutlim))
774
i *= (ulonglong) base;
783
*endptr = (char *) s;
788
return (~(ulonglong) 0);
791
return (negative ? -((longlong) i) : (longlong) i);
796
*endptr = (char *) nptr;
802
Read double from string
806
cs Character set information
807
str String to convert to double
808
length Optional length for string.
809
end result pointer to end of converted string
810
err Error number if failed conversion
813
If length is not INT_MAX32 or str[length] != 0 then the given str must
815
If length == INT_MAX32 the str must be \0 terminated.
817
It's implemented this way to save a buffer allocation and a memory copy.
820
Value of number in string
824
double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)),
825
char *str, size_t length,
826
char **end, int *err)
828
if (length == INT_MAX32)
829
length= 65535; /* Should be big enough */
831
return my_strtod(str, end, err);
836
This is a fast version optimized for the case of radix 10 / -10
841
size_t my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
842
char *dst, size_t len, int radix, long int val)
845
register char *p, *e;
848
unsigned long int uval = (unsigned long int) val;
850
e = p = &buffer[sizeof(buffer)-1];
857
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
858
uval= (unsigned long int)0 - uval;
865
new_val = (long) (uval / 10);
866
*--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
872
*--p = '0' + (char) (val-new_val*10);
876
len= min(len, (size_t) (e-p));
882
size_t my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
883
char *dst, size_t len, int radix,
887
register char *p, *e;
890
ulonglong uval = (ulonglong)val;
896
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
897
uval = (ulonglong)0 - uval;
904
e = p = &buffer[sizeof(buffer)-1];
914
while (uval > (ulonglong) LONG_MAX)
916
ulonglong quo= uval/(uint) 10;
917
uint rem= (uint) (uval- quo* (uint) 10);
922
long_val= (long) uval;
923
while (long_val != 0)
925
long quo= long_val/10;
926
*--p = (char) ('0' + (long_val - quo*10));
930
len= min(len, (size_t) (e-p));
938
** Compare string against string with wildcard
940
** -1 if not matched with wildcard
941
** 1 if matched with wildcard
944
#ifdef LIKE_CMP_TOUPPER
945
#define likeconv(s,A) (uchar) my_toupper(s,A)
947
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
950
#define INC_PTR(cs,A,B) (A)++
953
int my_wildcmp_8bit(CHARSET_INFO *cs,
954
const char *str,const char *str_end,
955
const char *wildstr,const char *wildend,
956
int escape, int w_one, int w_many)
958
int result= -1; /* Not found, using wildcards */
960
while (wildstr != wildend)
962
while (*wildstr != w_many && *wildstr != w_one)
964
if (*wildstr == escape && wildstr+1 != wildend)
967
if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
968
return(1); /* No match */
969
if (wildstr == wildend)
970
return(str != str_end); /* Match if both are at end */
971
result=1; /* Found an anchor char */
973
if (*wildstr == w_one)
977
if (str == str_end) /* Skip one char if possible */
979
INC_PTR(cs,str,str_end);
980
} while (++wildstr < wildend && *wildstr == w_one);
981
if (wildstr == wildend)
984
if (*wildstr == w_many)
989
/* Remove any '%' and '_' from the wild search string */
990
for (; wildstr != wildend ; wildstr++)
992
if (*wildstr == w_many)
994
if (*wildstr == w_one)
998
INC_PTR(cs,str,str_end);
1001
break; /* Not a wild character */
1003
if (wildstr == wildend)
1004
return(0); /* Ok if w_many is last */
1008
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
1011
INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
1012
cmp=likeconv(cs,cmp);
1015
while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
1017
if (str++ == str_end) return(-1);
1019
int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,
1024
} while (str != str_end && wildstr[0] != w_many);
1028
return(str != str_end ? 1 : 0);
1033
** Calculate min_str and max_str that ranges a LIKE string.
1035
** ptr Pointer to LIKE string.
1036
** ptr_length Length of LIKE string.
1037
** escape Escape character in LIKE. (Normally '\').
1038
** All escape characters should be removed from min_str and max_str
1039
** res_length Length of min_str and max_str.
1040
** min_str Smallest case sensitive string that ranges LIKE.
1041
** Should be space padded to res_length.
1042
** max_str Largest case sensitive string that ranges LIKE.
1043
** Normally padded with the biggest character sort value.
1045
** The function should return 0 if ok and 1 if the LIKE string can't be
1049
my_bool my_like_range_simple(CHARSET_INFO *cs,
1050
const char *ptr, size_t ptr_length,
1051
pbool escape, pbool w_one, pbool w_many,
1053
char *min_str,char *max_str,
1054
size_t *min_length, size_t *max_length)
1056
const char *end= ptr + ptr_length;
1057
char *min_org=min_str;
1058
char *min_end=min_str+res_length;
1059
size_t charlen= res_length / cs->mbmaxlen;
1061
for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
1063
if (*ptr == escape && ptr+1 != end)
1065
ptr++; /* Skip escape */
1066
*min_str++= *max_str++ = *ptr;
1069
if (*ptr == w_one) /* '_' in SQL */
1071
*min_str++='\0'; /* This should be min char */
1072
*max_str++= (char) cs->max_sort_char;
1075
if (*ptr == w_many) /* '%' in SQL */
1077
/* Calculate length of keys */
1078
*min_length= ((cs->state & MY_CS_BINSORT) ?
1079
(size_t) (min_str - min_org) :
1081
*max_length= res_length;
1085
*max_str++= (char) cs->max_sort_char;
1086
} while (min_str != min_end);
1089
*min_str++= *max_str++ = *ptr;
1092
*min_length= *max_length = (size_t) (min_str - min_org);
1093
while (min_str != min_end)
1094
*min_str++= *max_str++ = ' '; /* Because if key compression */
1099
size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
1101
const char *str0= str;
1104
case MY_SEQ_INTTAIL:
1107
for(str++ ; str != end && *str == '0' ; str++);
1108
return (size_t) (str - str0);
1113
for ( ; str < end ; str++)
1115
if (!my_isspace(cs,*str))
1118
return (size_t) (str - str0);
1125
void my_fill_8bit(CHARSET_INFO *cs __attribute__((unused)),
1126
char *s, size_t l, int fill)
1128
bfill((uchar*) s,l,fill);
1132
size_t my_numchars_8bit(CHARSET_INFO *cs __attribute__((unused)),
1133
const char *b, const char *e)
1135
return (size_t) (e - b);
1139
size_t my_numcells_8bit(CHARSET_INFO *cs __attribute__((unused)),
1140
const char *b, const char *e)
1142
return (size_t) (e - b);
1146
size_t my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
1147
const char *b __attribute__((unused)),
1148
const char *e __attribute__((unused)),
1155
size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
1156
const char *start, const char *end,
1157
size_t nchars, int *error)
1159
size_t nbytes= (size_t) (end-start);
1161
return min(nbytes, nchars);
1165
size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
1166
const char *ptr, size_t length)
1168
const char *end= ptr+length;
1169
while (end > ptr && end[-1] == ' ')
1171
return (size_t) (end-ptr);
1175
uint my_instr_simple(CHARSET_INFO *cs,
1176
const char *b, size_t b_length,
1177
const char *s, size_t s_length,
1178
my_match_t *match, uint nmatch)
1180
register const uchar *str, *search, *end, *search_end;
1182
if (s_length <= b_length)
1192
return 1; /* Empty string is always found */
1195
str= (const uchar*) b;
1196
search= (const uchar*) s;
1197
end= (const uchar*) b+b_length-s_length+1;
1198
search_end= (const uchar*) s + s_length;
1203
if (cs->sort_order[*str++] == cs->sort_order[*search])
1205
register const uchar *i,*j;
1210
while (j != search_end)
1211
if (cs->sort_order[*i++] != cs->sort_order[*j++])
1217
match[0].end= (size_t) (str- (const uchar*)b-1);
1218
match[0].mb_len= match[0].end;
1222
match[1].beg= match[0].end;
1223
match[1].end= match[0].end+s_length;
1224
match[1].mb_len= match[1].end-match[1].beg;
1241
#define PLANE_SIZE 0x100
1242
#define PLANE_NUM 0x100
1243
#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
1245
static int pcmp(const void * f, const void * s)
1247
const uni_idx *F= (const uni_idx*) f;
1248
const uni_idx *S= (const uni_idx*) s;
1251
if (!(res=((S->nchars)-(F->nchars))))
1252
res=((F->uidx.from)-(S->uidx.to));
1256
static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(size_t))
1258
uni_idx idx[PLANE_NUM];
1262
Check that Unicode map is loaded.
1263
It can be not loaded when the collation is
1264
listed in Index.xml but not specified
1265
in the character set specific XML file.
1267
if (!cs->tab_to_uni)
1270
/* Clear plane statistics */
1271
bzero(idx,sizeof(idx));
1273
/* Count number of characters in each plane */
1274
for (i=0; i< 0x100; i++)
1276
uint16 wc=cs->tab_to_uni[i];
1277
int pl= PLANE_NUMBER(wc);
1281
if (!idx[pl].nchars)
1283
idx[pl].uidx.from=wc;
1287
idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1288
idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1294
/* Sort planes in descending order */
1295
qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1297
for (i=0; i < PLANE_NUM; i++)
1301
/* Skip empty plane */
1305
numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1306
if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
1309
bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
1311
for (ch=1; ch < PLANE_SIZE; ch++)
1313
uint16 wc=cs->tab_to_uni[ch];
1314
if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1316
int ofs= wc - idx[i].uidx.from;
1317
idx[i].uidx.tab[ofs]= ch;
1322
/* Allocate and fill reverse table for each plane */
1324
if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
1327
for (i=0; i< n; i++)
1328
cs->tab_from_uni[i]= idx[i].uidx;
1330
/* Set end-of-list marker */
1331
bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
1335
static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(size_t))
1337
cs->caseup_multiply= 1;
1338
cs->casedn_multiply= 1;
1340
return create_fromuni(cs, alloc);
1343
static void set_max_sort_char(CHARSET_INFO *cs)
1348
if (!cs->sort_order)
1351
max_char=cs->sort_order[(uchar) cs->max_sort_char];
1352
for (i= 0; i < 256; i++)
1354
if ((uchar) cs->sort_order[i] > max_char)
1356
max_char=(uchar) cs->sort_order[i];
1357
cs->max_sort_char= i;
1362
static my_bool my_coll_init_simple(CHARSET_INFO *cs,
1363
void *(*alloc)(size_t) __attribute__((unused)))
1365
set_max_sort_char(cs);
1370
longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)),
1371
const char *nptr, char **endptr, int *error)
1373
return my_strtoll10(nptr, endptr, error);
1377
int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
1378
const uchar *s, const uchar *e)
1383
return MY_CS_TOOSMALL;
1385
*ctype= cs->ctype[*s + 1];
1390
#undef ULONGLONG_MAX
1392
Needed under MetroWerks Compiler, since MetroWerks compiler does not
1393
properly handle a constant expression containing a mod operator
1395
#if defined(__NETWARE__) && defined(__MWERKS__)
1396
static ulonglong ulonglong_max= ~(ulonglong) 0;
1397
#define ULONGLONG_MAX ulonglong_max
1399
#define ULONGLONG_MAX (~(ulonglong) 0)
1400
#endif /* __NETWARE__ && __MWERKS__ */
1403
#define CUTOFF (ULONGLONG_MAX / 10)
1404
#define CUTLIM (ULONGLONG_MAX % 10)
1405
#define DIGITS_IN_ULONGLONG 20
1407
static ulonglong d10[DIGITS_IN_ULONGLONG]=
1424
1000000000000000ULL,
1425
10000000000000000ULL,
1426
100000000000000000ULL,
1427
1000000000000000000ULL,
1428
10000000000000000000ULL
1434
Convert a string to unsigned long long integer value
1438
my_strntoull10_8bit()
1439
cs in pointer to character set
1440
str in pointer to the string to be converted
1441
length in string length
1442
unsigned_flag in whether the number is unsigned
1443
endptr out pointer to the stop character
1444
error out returned error code
1447
This function takes the decimal representation of integer number
1448
from string str and converts it to an signed or unsigned
1449
long long integer value.
1450
Space characters and tab are ignored.
1451
A sign character might precede the digit characters.
1452
The number may have any number of pre-zero digits.
1453
The number may have decimal point and exponent.
1454
Rounding is always done in "away from zero" style:
1458
The function stops reading the string str after "length" bytes
1459
or at the first character that is not a part of correct number syntax:
1461
<signed numeric literal> ::=
1462
[ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1464
<exact numeric literal> ::=
1465
<unsigned integer> [ <period> [ <unsigned integer> ] ]
1466
| <period> <unsigned integer>
1467
<unsigned integer> ::= <digit>...
1470
Value of string as a signed/unsigned longlong integer
1472
endptr cannot be NULL. The function will store the end pointer
1473
to the stop character here.
1475
The error parameter contains information how things went:
1477
ERANGE If the the value of the converted number is out of range
1478
In this case the return value is:
1479
- ULONGLONG_MAX if unsigned_flag and the number was too big
1480
- 0 if unsigned_flag and the number was negative
1481
- LONGLONG_MAX if no unsigned_flag and the number is too big
1482
- LONGLONG_MIN if no unsigned_flag and the number it too big negative
1484
EDOM If the string didn't contain any digits.
1485
In this case the return value is 0.
1489
my_strntoull10rnd_8bit(CHARSET_INFO *cs __attribute__((unused)),
1490
const char *str, size_t length, int unsigned_flag,
1491
char **endptr, int *error)
1493
const char *dot, *end9, *beg, *end= str + length;
1497
int shift= 0, digits= 0, negative, addon;
1499
/* Skip leading spaces and tabs */
1500
for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
1505
if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1512
end9= (str + 9) > end ? end : (str + 9);
1513
/* Accumulate small number into ulong, for performance purposes */
1514
for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
1519
if (str >= end) /* Small number without dots and expanents */
1521
*endptr= (char*) str;
1526
*error= ul ? MY_ERRNO_ERANGE : 0;
1532
return (ulonglong) (longlong) -(long) ul;
1538
return (ulonglong) ul;
1544
/* Continue to accumulate into ulonglong */
1545
for (dot= NULL, ull= ul; str < end; str++)
1547
if ((ch= (uchar) (*str - '0')) < 10)
1549
if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1556
Adding the next digit would overflow.
1557
Remember the next digit in "addon", for rounding.
1558
Scan all digits with an optional single dot.
1567
addon= (*str >= '5');
1570
for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
1571
if (str < end && *str == '.')
1574
for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1580
for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1589
/* The second dot character */
1600
/* Unknown character, exit the loop */
1603
shift= dot ? dot - str : 0; /* Right shift */
1606
exp: /* [ E [ <sign> ] <unsigned integer> ] */
1614
if (str < end && (*str == 'e' || *str == 'E'))
1619
int negative_exp, exponent;
1620
if ((negative_exp= (*str == '-')) || *str=='+')
1626
str < end && (ch= (uchar) (*str - '0')) < 10;
1629
exponent= exponent * 10 + ch;
1631
shift+= negative_exp ? -exponent : exponent;
1635
if (shift == 0) /* No shift, check addon digit */
1639
if (ull == ULONGLONG_MAX)
1646
if (shift < 0) /* Right shift */
1650
if (-shift >= DIGITS_IN_ULONGLONG)
1651
goto ret_zero; /* Exponent is a big negative number, return 0 */
1661
if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1668
for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1671
goto ret_too_big; /* Overflow, number too big */
1675
*endptr= (char*) str;
1681
if (ull > (ulonglong) LONGLONG_MIN)
1683
*error= MY_ERRNO_ERANGE;
1684
return (ulonglong) LONGLONG_MIN;
1687
return (ulonglong) -(longlong) ull;
1691
if (ull > (ulonglong) LONGLONG_MAX)
1693
*error= MY_ERRNO_ERANGE;
1694
return (ulonglong) LONGLONG_MAX;
1701
/* Unsigned number */
1702
if (negative && ull)
1704
*error= MY_ERRNO_ERANGE;
1711
*endptr= (char*) str;
1716
*endptr= (char*) str;
1717
*error= MY_ERRNO_EDOM;
1721
*endptr= (char*) str;
1722
*error= MY_ERRNO_ERANGE;
1723
return unsigned_flag ?
1725
negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX;
1730
Check if a constant can be propagated
1733
my_propagate_simple()
1734
cs Character set information
1735
str String to convert to double
1736
length Optional length for string.
1739
Takes the string in the given charset and check
1740
if it can be safely propagated in the optimizer.
1743
s char(5) character set latin1 collate latin1_german2_ci);
1744
insert into t1 values (0xf6); -- o-umlaut
1745
select * from t1 where length(s)=1 and s='oe';
1747
The above query should return one row.
1748
We cannot convert this query into:
1749
select * from t1 where length('oe')=1 and s='oe';
1751
Currently we don't check the constant itself,
1752
and decide not to propagate a constant
1753
just if the collation itself allows tricky things
1754
like expansions and contractions. In the future
1755
we can write a more sophisticated functions to
1756
check the constants. For example, 'oa' can always
1757
be safety propagated in German2 because unlike
1758
'oe' it does not have any special meaning.
1761
1 if constant can be safely propagated
1762
0 if it is not safe to propagate the constant
1767
my_bool my_propagate_simple(CHARSET_INFO *cs __attribute__((unused)),
1768
const uchar *str __attribute__((unused)),
1769
size_t length __attribute__((unused)))
1775
my_bool my_propagate_complex(CHARSET_INFO *cs __attribute__((unused)),
1776
const uchar *str __attribute__((unused)),
1777
size_t length __attribute__((unused)))
1783
MY_CHARSET_HANDLER my_charset_8bit_handler=
1786
NULL, /* ismbchar */
1787
my_mbcharlen_8bit, /* mbcharlen */
1790
my_well_formed_len_8bit,
1801
my_long10_to_str_8bit,
1802
my_longlong10_to_str_8bit,
1810
my_strntoull10rnd_8bit,
1814
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
1816
my_coll_init_simple, /* init */
1817
my_strnncoll_simple,
1818
my_strnncollsp_simple,
1820
my_strnxfrmlen_simple,
1821
my_like_range_simple,
1825
my_hash_sort_simple,