~ubuntu-branches/ubuntu/quantal/drizzle/quantal

1 by Monty Taylor
Import upstream version 2010.03.1347
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1.2.1 by Monty Taylor
Import upstream version 2010.11.03
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by Monty Taylor
Import upstream version 2010.03.1347
15
16
/*
17
  A better inplementation of the UNIX ctype(3) library.
18
*/
19
20
#ifndef DRIZZLED_CHARSET_INFO_H
21
#define DRIZZLED_CHARSET_INFO_H
22
23
#include <sys/types.h>
1.2.1 by Monty Taylor
Import upstream version 2010.11.03
24
#include <cstddef>
1 by Monty Taylor
Import upstream version 2010.03.1347
25
26
namespace drizzled
27
{
28
29
#define MY_CS_NAME_SIZE			32
30
#define MY_CS_CTYPE_TABLE_SIZE		257
31
#define MY_CS_TO_LOWER_TABLE_SIZE	256
32
#define MY_CS_TO_UPPER_TABLE_SIZE	256
33
#define MY_CS_SORT_ORDER_TABLE_SIZE	256
34
#define MY_CS_TO_UNI_TABLE_SIZE		256
35
36
#define CHARSET_DIR	"charsets/"
37
38
#define my_wc_t unsigned long
39
40
typedef struct unicase_info_st
41
{
42
  uint16_t toupper;
43
  uint16_t tolower;
44
  uint16_t sort;
45
} MY_UNICASE_INFO;
46
47
48
extern MY_UNICASE_INFO *my_unicase_default[256];
49
extern MY_UNICASE_INFO *my_unicase_turkish[256];
50
51
typedef struct uni_ctype_st
52
{
53
  unsigned char  pctype;
54
  unsigned char  *ctype;
55
} MY_UNI_CTYPE;
56
57
extern MY_UNI_CTYPE my_uni_ctype[256];
58
59
/* wm_wc and wc_mb return codes */
60
#define MY_CS_ILSEQ	0     /* Wrong by sequence: wb_wc                   */
61
#define MY_CS_ILUNI	0     /* Cannot encode Unicode to charset: wc_mb    */
62
#define MY_CS_TOOSMALL  -101  /* Need at least one byte:    wc_mb and mb_wc */
63
#define MY_CS_TOOSMALL2 -102  /* Need at least two bytes:   wc_mb and mb_wc */
64
#define MY_CS_TOOSMALL3 -103  /* Need at least three bytes: wc_mb and mb_wc */
65
/* These following three are currently not really used */
66
#define MY_CS_TOOSMALL4 -104  /* Need at least 4 bytes: wc_mb and mb_wc */
67
#define MY_CS_TOOSMALL5 -105  /* Need at least 5 bytes: wc_mb and mb_wc */
68
#define MY_CS_TOOSMALL6 -106  /* Need at least 6 bytes: wc_mb and mb_wc */
1.2.1 by Monty Taylor
Import upstream version 2010.11.03
69
70
/* A helper function for "need at least n bytes" */
71
inline static int my_cs_toosmalln(int n)
72
{
73
  return -100-n;
74
}
1 by Monty Taylor
Import upstream version 2010.03.1347
75
76
#define MY_SEQ_INTTAIL	1
77
#define MY_SEQ_SPACES	2
78
79
        /* My charsets_list flags */
80
#define MY_CS_COMPILED  1      /* compiled-in sets               */
81
#define MY_CS_CONFIG    2      /* sets that have a *.conf file   */
82
#define MY_CS_INDEX     4      /* sets listed in the Index file  */
83
#define MY_CS_LOADED    8      /* sets that are currently loaded */
84
#define MY_CS_BINSORT	16     /* if binary sort order           */
85
#define MY_CS_PRIMARY	32     /* if primary collation           */
86
#define MY_CS_STRNXFRM	64     /* if strnxfrm is used for sort   */
87
#define MY_CS_UNICODE	128    /* is a charset is full unicode   */
88
#define MY_CS_READY	256    /* if a charset is initialized    */
89
#define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
90
#define MY_CS_CSSORT	1024   /* if case sensitive sort order   */
91
#define MY_CS_HIDDEN	2048   /* don't display in SHOW          */
92
#define MY_CS_NONASCII  8192   /* if not ASCII-compatible        */
93
#define MY_CHARSET_UNDEFINED 0
94
95
/* Flags for strxfrm */
96
#define MY_STRXFRM_LEVEL1          0x00000001 /* for primary weights   */
97
#define MY_STRXFRM_LEVEL2          0x00000002 /* for secondary weights */
98
#define MY_STRXFRM_LEVEL3          0x00000004 /* for tertiary weights  */
99
#define MY_STRXFRM_LEVEL4          0x00000008 /* fourth level weights  */
100
#define MY_STRXFRM_LEVEL5          0x00000010 /* fifth level weights   */
101
#define MY_STRXFRM_LEVEL6          0x00000020 /* sixth level weights   */
102
#define MY_STRXFRM_LEVEL_ALL       0x0000003F /* Bit OR for the above six */
103
#define MY_STRXFRM_NLEVELS         6          /* Number of possible levels*/
104
105
#define MY_STRXFRM_PAD_WITH_SPACE  0x00000040 /* if pad result with spaces */
106
#define MY_STRXFRM_UNUSED_00000080 0x00000080 /* for future extensions     */
107
108
#define MY_STRXFRM_DESC_LEVEL1     0x00000100 /* if desc order for level1 */
109
#define MY_STRXFRM_DESC_LEVEL2     0x00000200 /* if desc order for level2 */
110
#define MY_STRXFRM_DESC_LEVEL3     0x00000300 /* if desc order for level3 */
111
#define MY_STRXFRM_DESC_LEVEL4     0x00000800 /* if desc order for level4 */
112
#define MY_STRXFRM_DESC_LEVEL5     0x00001000 /* if desc order for level5 */
113
#define MY_STRXFRM_DESC_LEVEL6     0x00002000 /* if desc order for level6 */
114
#define MY_STRXFRM_DESC_SHIFT      8
115
116
#define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions     */
117
#define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions     */
118
119
#define MY_STRXFRM_REVERSE_LEVEL1  0x00010000 /* if reverse order for level1 */
120
#define MY_STRXFRM_REVERSE_LEVEL2  0x00020000 /* if reverse order for level2 */
121
#define MY_STRXFRM_REVERSE_LEVEL3  0x00040000 /* if reverse order for level3 */
122
#define MY_STRXFRM_REVERSE_LEVEL4  0x00080000 /* if reverse order for level4 */
123
#define MY_STRXFRM_REVERSE_LEVEL5  0x00100000 /* if reverse order for level5 */
124
#define MY_STRXFRM_REVERSE_LEVEL6  0x00200000 /* if reverse order for level6 */
125
#define MY_STRXFRM_REVERSE_SHIFT   16
126
127
128
typedef struct my_uni_idx_st
129
{
130
  uint16_t from;
131
  uint16_t to;
132
  unsigned char  *tab;
133
} MY_UNI_IDX;
134
135
typedef struct
136
{
137
  uint32_t beg;
138
  uint32_t end;
139
  uint32_t mb_len;
140
} my_match_t;
141
142
enum my_lex_states
143
{
144
  MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT,
145
  MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
146
  MY_LEX_REAL, MY_LEX_HEX_NUMBER, MY_LEX_BIN_NUMBER,
147
  MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
148
  MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
149
  MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE,
150
  MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON,
151
  MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP,
152
  MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
153
  MY_LEX_IDENT_OR_KEYWORD,
154
  MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN,
155
  MY_LEX_STRING_OR_DELIMITER
156
};
157
158
struct charset_info_st;
159
160
161
/* See strings/CHARSET_INFO.txt for information about this structure  */
162
typedef struct my_collation_handler_st
163
{
164
  bool (*init)(struct charset_info_st *, void *(*alloc)(size_t));
165
  /* Collation routines */
166
  int     (*strnncoll)(const struct charset_info_st * const,
167
		       const unsigned char *, size_t, const unsigned char *, size_t, bool);
168
  int     (*strnncollsp)(const struct charset_info_st * const,
169
                         const unsigned char *, size_t, const unsigned char *, size_t,
170
                         bool diff_if_only_endspace_difference);
171
  size_t  (*strnxfrm)(const struct charset_info_st * const,
172
                      unsigned char *dst, size_t dstlen, uint32_t nweights,
173
                      const unsigned char *src, size_t srclen, uint32_t flags);
174
  size_t    (*strnxfrmlen)(const struct charset_info_st * const, size_t);
175
  bool (*like_range)(const struct charset_info_st * const,
176
                        const char *s, size_t s_length,
177
                        char escape, char w_one, char w_many,
178
                        size_t res_length,
179
                        char *min_str, char *max_str,
180
                        size_t *min_len, size_t *max_len);
181
  int     (*wildcmp)(const struct charset_info_st * const,
182
  		     const char *str,const char *str_end,
183
                     const char *wildstr,const char *wildend,
184
                     int escape,int w_one, int w_many);
185
186
  int  (*strcasecmp)(const struct charset_info_st * const, const char *, const char *);
187
188
  uint32_t (*instr)(const struct charset_info_st * const,
189
                const char *b, size_t b_length,
190
                const char *s, size_t s_length,
191
                my_match_t *match, uint32_t nmatch);
192
193
  /* Hash calculation */
194
  void (*hash_sort)(const struct charset_info_st *cs, const unsigned char *key, size_t len,
195
                    uint32_t *nr1, uint32_t *nr2);
196
  bool (*propagate)(const struct charset_info_st *cs, const unsigned char *str, size_t len);
197
} MY_COLLATION_HANDLER;
198
199
extern MY_COLLATION_HANDLER my_collation_mb_bin_handler;
200
extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
201
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
202
203
/* Some typedef to make it easy for C++ to make function pointers */
204
typedef int (*my_charset_conv_mb_wc)(const struct charset_info_st * const, my_wc_t *,
205
                                     const unsigned char *, const unsigned char *);
206
typedef int (*my_charset_conv_wc_mb)(const struct charset_info_st * const, my_wc_t,
207
                                     unsigned char *, unsigned char *);
208
typedef size_t (*my_charset_conv_case)(const struct charset_info_st * const,
209
                                       char *, size_t, char *, size_t);
210
211
212
/* See strings/CHARSET_INFO.txt about information on this structure  */
213
typedef struct my_charset_handler_st
214
{
215
  bool (*init)(struct charset_info_st *, void *(*alloc)(size_t));
216
  /* Multibyte routines */
217
  uint32_t    (*ismbchar)(const struct charset_info_st * const, const char *, const char *);
218
  uint32_t    (*mbcharlen)(const struct charset_info_st * const, uint32_t c);
219
  size_t  (*numchars)(const struct charset_info_st * const, const char *b, const char *e);
220
  size_t  (*charpos)(const struct charset_info_st * const, const char *b, const char *e,
221
                     size_t pos);
222
  size_t  (*well_formed_len)(const struct charset_info_st * const,
223
                             const char *b,const char *e,
224
                             size_t nchars, int *error);
225
  size_t  (*lengthsp)(const struct charset_info_st * const, const char *ptr, size_t length);
226
  size_t  (*numcells)(const struct charset_info_st * const, const char *b, const char *e);
227
228
  /* Unicode conversion */
229
  my_charset_conv_mb_wc mb_wc;
230
  my_charset_conv_wc_mb wc_mb;
231
232
  /* CTYPE scanner */
233
  int (*ctype)(const struct charset_info_st *cs, int *ctype,
234
               const unsigned char *s, const unsigned char *e);
235
236
  /* Functions for case and sort conversion */
237
  size_t  (*caseup_str)(const struct charset_info_st * const, char *);
238
  size_t  (*casedn_str)(const struct charset_info_st * const, char *);
239
240
  my_charset_conv_case caseup;
241
  my_charset_conv_case casedn;
242
243
  /* Charset dependant snprintf() */
244
  size_t (*snprintf)(const struct charset_info_st * const, char *to, size_t n,
245
                     const char *fmt,
1.2.1 by Monty Taylor
Import upstream version 2010.11.03
246
                     ...)
247
#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
248
                         __attribute__((format(printf, 4, 5)))
249
#endif
250
                         ;
1 by Monty Taylor
Import upstream version 2010.03.1347
251
  size_t (*long10_to_str)(const struct charset_info_st * const, char *to, size_t n,
252
                          int radix, long int val);
253
  size_t (*int64_t10_to_str)(const struct charset_info_st * const, char *to, size_t n,
254
                              int radix, int64_t val);
255
256
  void (*fill)(const struct charset_info_st * const, char *to, size_t len, int fill);
257
258
  /* String-to-number conversion routines */
259
  long        (*strntol)(const struct charset_info_st * const, const char *s, size_t l,
260
			 int base, char **e, int *err);
261
  unsigned long      (*strntoul)(const struct charset_info_st * const, const char *s, size_t l,
262
			 int base, char **e, int *err);
263
  int64_t   (*strntoll)(const struct charset_info_st * const, const char *s, size_t l,
264
			 int base, char **e, int *err);
265
  uint64_t (*strntoull)(const struct charset_info_st * const, const char *s, size_t l,
266
			 int base, char **e, int *err);
267
  double      (*strntod)(const struct charset_info_st * const, char *s, size_t l, char **e,
268
			 int *err);
269
  int64_t    (*strtoll10)(const struct charset_info_st *cs,
270
                           const char *nptr, char **endptr, int *error);
271
  uint64_t   (*strntoull10rnd)(const struct charset_info_st *cs,
272
                                const char *str, size_t length,
273
                                int unsigned_fl,
274
                                char **endptr, int *error);
275
  size_t        (*scan)(const struct charset_info_st * const, const char *b, const char *e,
276
                        int sq);
277
} MY_CHARSET_HANDLER;
278
279
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
280
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
281
282
283
/* See strings/CHARSET_INFO.txt about information on this structure  */
284
typedef struct charset_info_st
285
{
286
  uint32_t      number;
287
  uint32_t      primary_number;
288
  uint32_t      binary_number;
289
  uint32_t      state;
290
  const char *csname;
291
  const char *name;
292
  const char *comment;
293
  const char *tailoring;
294
  unsigned char    *ctype;
295
  unsigned char    *to_lower;
296
  unsigned char    *to_upper;
297
  unsigned char    *sort_order;
298
  uint16_t   *contractions;
299
  uint16_t   **sort_order_big;
300
  uint16_t      *tab_to_uni;
301
  MY_UNI_IDX  *tab_from_uni;
302
  MY_UNICASE_INFO **caseinfo;
303
  unsigned char     *state_map;
304
  unsigned char     *ident_map;
305
  uint32_t      strxfrm_multiply;
306
  unsigned char     caseup_multiply;
307
  unsigned char     casedn_multiply;
308
  uint32_t      mbminlen;
309
  uint32_t      mbmaxlen;
310
  uint16_t    min_sort_char;
311
  uint16_t    max_sort_char; /* For LIKE optimization */
312
  unsigned char     pad_char;
313
  bool   escape_with_backslash_is_dangerous;
314
  unsigned char     levels_for_compare;
315
  unsigned char     levels_for_order;
316
317
  MY_CHARSET_HANDLER *cset;
318
  MY_COLLATION_HANDLER *coll;
319
320
} CHARSET_INFO;
321
322
#define ILLEGAL_CHARSET_INFO_NUMBER (UINT32_MAX)
323
324
325
extern CHARSET_INFO my_charset_bin;
326
extern CHARSET_INFO my_charset_utf8mb4_bin;
327
extern CHARSET_INFO my_charset_utf8mb4_general_ci;
328
extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
329
330
#define MY_UTF8MB4                 "utf8"
331
#define my_charset_utf8_general_ci ::drizzled::my_charset_utf8mb4_general_ci
332
#define my_charset_utf8_bin        ::drizzled::my_charset_utf8mb4_bin
333
334
335
/* declarations for simple charsets */
336
size_t my_strnxfrmlen_simple(const CHARSET_INFO * const, size_t);
337
338
extern int  my_strnncollsp_simple(const CHARSET_INFO * const, const unsigned char *, size_t,
339
                                  const unsigned char *, size_t,
340
                                  bool diff_if_only_endspace_difference);
341
342
extern size_t my_lengthsp_8bit(const CHARSET_INFO * const cs, const char *ptr, size_t length);
343
344
extern uint32_t my_instr_simple(const CHARSET_INFO * const,
345
                            const char *b, size_t b_length,
346
                            const char *s, size_t s_length,
347
                            my_match_t *match, uint32_t nmatch);
348
349
350
/* Functions for 8bit */
351
int my_mb_ctype_8bit(const CHARSET_INFO * const,int *, const unsigned char *,const unsigned char *);
352
int my_mb_ctype_mb(const CHARSET_INFO * const,int *, const unsigned char *,const unsigned char *);
353
354
size_t my_scan_8bit(const CHARSET_INFO * const cs, const char *b, const char *e, int sq);
355
356
size_t my_snprintf_8bit(const CHARSET_INFO * const, char *to, size_t n,
357
                        const char *fmt, ...)
358
  __attribute__((format(printf, 4, 5)));
359
360
long       my_strntol_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
361
                           char **e, int *err);
362
unsigned long      my_strntoul_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
363
			    char **e, int *err);
364
int64_t   my_strntoll_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
365
			    char **e, int *err);
366
uint64_t my_strntoull_8bit(const CHARSET_INFO * const, const char *s, size_t l, int base,
367
			    char **e, int *err);
368
double      my_strntod_8bit(const CHARSET_INFO * const, char *s, size_t l,char **e,
369
			    int *err);
370
size_t my_long10_to_str_8bit(const CHARSET_INFO * const, char *to, size_t l, int radix,
371
                             long int val);
372
size_t my_int64_t10_to_str_8bit(const CHARSET_INFO * const, char *to, size_t l, int radix,
373
                                 int64_t val);
374
375
int64_t my_strtoll10_8bit(const CHARSET_INFO * const cs,
376
                           const char *nptr, char **endptr, int *error);
377
int64_t my_strtoll10_ucs2(CHARSET_INFO *cs,
378
                           const char *nptr, char **endptr, int *error);
379
380
uint64_t my_strntoull10rnd_8bit(const CHARSET_INFO * const cs,
381
                                 const char *str, size_t length, int
382
                                 unsigned_fl, char **endptr, int *error);
383
uint64_t my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
384
                                 const char *str, size_t length,
385
                                 int unsigned_fl, char **endptr, int *error);
386
387
void my_fill_8bit(const CHARSET_INFO * const cs, char* to, size_t l, int fill);
388
389
bool  my_like_range_simple(const CHARSET_INFO * const cs,
390
			      const char *ptr, size_t ptr_length,
391
			      char escape, char w_one, char w_many,
392
			      size_t res_length,
393
			      char *min_str, char *max_str,
394
			      size_t *min_length, size_t *max_length);
395
396
bool  my_like_range_mb(const CHARSET_INFO * const cs,
397
			  const char *ptr, size_t ptr_length,
398
			  char escape, char w_one, char w_many,
399
			  size_t res_length,
400
			  char *min_str, char *max_str,
401
			  size_t *min_length, size_t *max_length);
402
403
bool  my_like_range_ucs2(const CHARSET_INFO * const cs,
404
			    const char *ptr, size_t ptr_length,
405
			    char escape, char w_one, char w_many,
406
			    size_t res_length,
407
			    char *min_str, char *max_str,
408
			    size_t *min_length, size_t *max_length);
409
410
bool  my_like_range_utf16(const CHARSET_INFO * const cs,
411
			     const char *ptr, size_t ptr_length,
412
			     char escape, char w_one, char w_many,
413
			     size_t res_length,
414
			     char *min_str, char *max_str,
415
			     size_t *min_length, size_t *max_length);
416
417
bool  my_like_range_utf32(const CHARSET_INFO * const cs,
418
			     const char *ptr, size_t ptr_length,
419
			     char escape, char w_one, char w_many,
420
			     size_t res_length,
421
			     char *min_str, char *max_str,
422
			     size_t *min_length, size_t *max_length);
423
424
425
int my_wildcmp_8bit(const CHARSET_INFO * const,
426
		    const char *str,const char *str_end,
427
		    const char *wildstr,const char *wildend,
428
		    int escape, int w_one, int w_many);
429
430
int my_wildcmp_bin(const CHARSET_INFO * const,
431
		   const char *str,const char *str_end,
432
		   const char *wildstr,const char *wildend,
433
		   int escape, int w_one, int w_many);
434
435
size_t my_numchars_8bit(const CHARSET_INFO * const, const char *b, const char *e);
436
size_t my_numcells_8bit(const CHARSET_INFO * const, const char *b, const char *e);
437
size_t my_charpos_8bit(const CHARSET_INFO * const, const char *b, const char *e, size_t pos);
438
size_t my_well_formed_len_8bit(const CHARSET_INFO * const, const char *b, const char *e,
439
                             size_t pos, int *error);
440
typedef  void *(*cs_alloc_func)(size_t);
441
bool my_coll_init_simple(CHARSET_INFO *cs, cs_alloc_func alloc);
442
bool my_cset_init_8bit(CHARSET_INFO *cs, cs_alloc_func alloc);
443
uint32_t my_mbcharlen_8bit(const CHARSET_INFO * const, uint32_t c);
444
445
446
/* Functions for multibyte charsets */
447
extern size_t my_caseup_str_mb(const CHARSET_INFO * const, char *);
448
extern size_t my_casedn_str_mb(const CHARSET_INFO * const, char *);
449
extern size_t my_caseup_mb(const CHARSET_INFO * const, char *src, size_t srclen,
450
                                         char *dst, size_t dstlen);
451
extern size_t my_casedn_mb(const CHARSET_INFO * const, char *src, size_t srclen,
452
                                         char *dst, size_t dstlen);
453
extern int my_strcasecmp_mb(const CHARSET_INFO * const  cs, const char *s, const char *t);
454
455
int my_wildcmp_mb(const CHARSET_INFO * const,
456
		  const char *str,const char *str_end,
457
		  const char *wildstr,const char *wildend,
458
		  int escape, int w_one, int w_many);
459
size_t my_numchars_mb(const CHARSET_INFO * const, const char *b, const char *e);
460
size_t my_numcells_mb(const CHARSET_INFO * const, const char *b, const char *e);
461
size_t my_charpos_mb(const CHARSET_INFO * const, const char *b, const char *e, size_t pos);
462
size_t my_well_formed_len_mb(const CHARSET_INFO * const, const char *b, const char *e,
463
                             size_t pos, int *error);
464
uint32_t my_instr_mb(const CHARSET_INFO * const,
465
                 const char *b, size_t b_length,
466
                 const char *s, size_t s_length,
467
                 my_match_t *match, uint32_t nmatch);
468
469
int my_strnncoll_mb_bin(const CHARSET_INFO * const  cs,
470
                        const unsigned char *s, size_t slen,
471
                        const unsigned char *t, size_t tlen,
472
                        bool t_is_prefix);
473
474
int my_strnncollsp_mb_bin(const CHARSET_INFO * const cs,
475
                          const unsigned char *a, size_t a_length,
476
                          const unsigned char *b, size_t b_length,
477
                          bool diff_if_only_endspace_difference);
478
479
int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
480
                      const char *str,const char *str_end,
481
                      const char *wildstr,const char *wildend,
482
                      int escape, int w_one, int w_many);
483
484
int my_strcasecmp_mb_bin(const CHARSET_INFO * const, const char *s, const char *t);
485
486
void my_hash_sort_mb_bin(const CHARSET_INFO * const,
487
                         const unsigned char *key, size_t len, uint32_t *nr1, uint32_t *nr2);
488
489
size_t my_strnxfrm_mb(const CHARSET_INFO * const,
490
                      unsigned char *dst, size_t dstlen, uint32_t nweights,
491
                      const unsigned char *src, size_t srclen, uint32_t flags);
492
493
int my_wildcmp_unicode(const CHARSET_INFO * const cs,
494
                       const char *str, const char *str_end,
495
                       const char *wildstr, const char *wildend,
496
                       int escape, int w_one, int w_many,
497
                       MY_UNICASE_INFO **weights);
498
499
extern bool my_parse_charset_xml(const char *bug, size_t len,
500
				    int (*add)(CHARSET_INFO *cs));
501
502
bool my_propagate_simple(const CHARSET_INFO * const cs, const unsigned char *str, size_t len);
503
bool my_propagate_complex(const CHARSET_INFO * const cs, const unsigned char *str, size_t len);
504
505
506
uint32_t my_strxfrm_flag_normalize(uint32_t flags, uint32_t nlevels);
507
void my_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
508
                                 uint32_t flags, uint32_t level);
509
size_t my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO * const cs,
510
                                       unsigned char *str, unsigned char *frmend, unsigned char *strend,
511
                                       uint32_t nweights, uint32_t flags, uint32_t level);
512
513
bool my_charset_is_ascii_compatible(const CHARSET_INFO * const cs);
514
515
/*
516
  Compare 0-terminated UTF8 strings.
517
518
  SYNOPSIS
519
    my_strcasecmp_utf8mb3()
520
    cs                  character set handler
521
    s                   First 0-terminated string to compare
522
    t                   Second 0-terminated string to compare
523
524
  IMPLEMENTATION
525
526
  RETURN
527
    - negative number if s < t
528
    - positive number if s > t
529
    - 0 is the strings are equal
530
*/
531
int
532
my_wc_mb_filename(const CHARSET_INFO * const,
533
                  my_wc_t wc, unsigned char *s, unsigned char *e);
534
535
int
536
my_mb_wc_filename(const CHARSET_INFO * const,
537
                  my_wc_t *pwc, const unsigned char *s, const unsigned char *e);
538
539
540
unsigned int my_ismbchar_utf8mb4(const CHARSET_INFO * const cs,                                  const char *b, const char *e);
541
unsigned int my_mbcharlen_utf8mb4(const CHARSET_INFO * const, uint32_t c);
542
543
size_t my_strnxfrmlen_utf8mb4(const CHARSET_INFO * const, size_t len);
544
size_t
545
my_strnxfrm_utf8mb4(const CHARSET_INFO * const cs,
546
                    unsigned char *dst, size_t dstlen, uint32_t nweights,
547
                    const unsigned char *src, size_t srclen, uint32_t flags);
548
549
int
550
my_wildcmp_utf8mb4(const CHARSET_INFO * const cs,
551
                   const char *str, const char *strend,
552
                   const char *wildstr, const char *wildend,
553
                   int escape, int w_one, int w_many);
554
int
555
my_strnncollsp_utf8mb4(const CHARSET_INFO * const cs,
556
                       const unsigned char *s, size_t slen,
557
                       const unsigned char *t, size_t tlen,
558
                       bool diff_if_only_endspace_difference);
559
int my_strcasecmp_utf8mb4(const CHARSET_INFO * const cs,
560
                          const char *s, const char *t);
561
562
int
563
my_strnncoll_utf8mb4(const CHARSET_INFO * const cs,
564
                     const unsigned char *s, size_t slen,
565
                     const unsigned char *t, size_t tlen,
566
                     bool t_is_prefix);
567
568
int
569
my_mb_wc_utf8mb4(const CHARSET_INFO * const cs,
570
                 my_wc_t * pwc, const unsigned char *s, const unsigned char *e);
571
572
int
573
my_wc_mb_utf8mb4(const CHARSET_INFO * const cs,
574
                 my_wc_t wc, unsigned char *r, unsigned char *e);
575
576
size_t my_caseup_str_utf8mb4(const CHARSET_INFO * const cs, char *src);
577
size_t my_casedn_str_utf8mb4(const CHARSET_INFO * const cs, char *src);
578
579
size_t
580
my_caseup_utf8mb4(const CHARSET_INFO * const cs, char *src, size_t srclen,
581
                  char *dst, size_t dstlen);
582
size_t
583
my_casedn_utf8mb4(const CHARSET_INFO * const cs,
584
                  char *src, size_t srclen,
585
                  char *dst, size_t dstlen);
586
587
588
bool my_coll_init_uca(CHARSET_INFO *cs, cs_alloc_func alloc);
589
590
int my_strnncoll_any_uca(const CHARSET_INFO * const cs,
591
                         const unsigned char *s, size_t slen,
592
                         const unsigned char *t, size_t tlen,
593
                         bool t_is_prefix);
594
595
int my_strnncollsp_any_uca(const CHARSET_INFO * const cs,
596
                           const unsigned char *s, size_t slen,
597
                           const unsigned char *t, size_t tlen,
598
                           bool diff_if_only_endspace_difference);
599
600
void my_hash_sort_any_uca(const CHARSET_INFO * const cs,
601
                          const unsigned char *s, size_t slen,
602
                          uint32_t *n1, uint32_t *n2);
603
604
size_t my_strnxfrm_any_uca(const CHARSET_INFO * const cs,
605
                           unsigned char *dst, size_t dstlen, uint32_t nweights,
606
                           const unsigned char *src, size_t srclen,
607
                           uint32_t flags);
608
609
int my_wildcmp_uca(const CHARSET_INFO * const cs,
610
                   const char *str,const char *str_end,
611
                   const char *wildstr,const char *wildend,
612
                   int escape, int w_one, int w_many);
613
614
int my_strnncoll_8bit_bin(const CHARSET_INFO * const,
615
                          const unsigned char *s, size_t slen,
616
                          const unsigned char *t, size_t tlen,
617
                          bool t_is_prefix);
618
int my_strnncollsp_8bit_bin(const CHARSET_INFO * const,
619
                            const unsigned char *a, size_t a_length,
620
                            const unsigned char *b, size_t b_length,
621
                            bool diff_if_only_endspace_difference);
622
size_t my_case_str_bin(const CHARSET_INFO * const, char *);
623
size_t my_case_bin(const CHARSET_INFO * const, char *,
624
                   size_t srclen, char *, size_t);
625
int my_strcasecmp_bin(const CHARSET_INFO * const,
626
                      const char *s, const char *t);
627
size_t
628
my_strnxfrm_8bit_bin(const CHARSET_INFO * const cs,
629
                     unsigned char * dst, size_t dstlen, uint32_t nweights,
630
                     const unsigned char *src, size_t srclen, uint32_t flags);
631
uint32_t my_instr_bin(const CHARSET_INFO * const,
632
                      const char *b, size_t b_length,
633
                      const char *s, size_t s_length,
634
                      my_match_t *match, uint32_t nmatch);
635
size_t my_lengthsp_binary(const CHARSET_INFO * const,
636
                          const char *, size_t length);
637
int my_mb_wc_bin(const CHARSET_INFO * const,
638
                 my_wc_t *wc, const unsigned char *str,
639
                 const unsigned char *end);
640
int my_wc_mb_bin(const CHARSET_INFO * const, my_wc_t wc,
641
                 unsigned char *str, unsigned char *end);
642
void my_hash_sort_8bit_bin(const CHARSET_INFO * const,
643
                           const unsigned char *key, size_t len,
644
                           uint32_t *nr1, uint32_t *nr2);
645
bool my_coll_init_8bit_bin(CHARSET_INFO *cs,
646
                           cs_alloc_func);
647
int my_strnncoll_binary(const CHARSET_INFO * const,
648
                        const unsigned char *s, size_t slen,
649
                        const unsigned char *t, size_t tlen,
650
                        bool t_is_prefix);
651
int my_strnncollsp_binary(const CHARSET_INFO * const cs,
652
                          const unsigned char *s, size_t slen,
653
                          const unsigned char *t, size_t tlen,
654
                          bool);
655
656
657
#define	_MY_U	01	/* Upper case */
658
#define	_MY_L	02	/* Lower case */
659
#define	_MY_NMR	04	/* Numeral (digit) */
660
#define	_MY_SPC	010	/* Spacing character */
661
#define	_MY_PNT	020	/* Punctuation */
662
#define	_MY_CTR	040	/* Control character */
663
#define	_MY_B	0100	/* Blank */
664
#define	_MY_X	0200	/* heXadecimal digit */
665
666
1.2.1 by Monty Taylor
Import upstream version 2010.11.03
667
inline static bool my_isascii(char c)      
668
{
669
  return (!(c & ~0177));
670
}
671
672
inline static char my_toascii(char c)
673
{
674
  return (c & 0177);
675
}
676
677
inline static char my_tocntrl(char c) 
678
{
679
  return (c & 31);
680
}
681
682
inline static char my_toprint(char c)
683
{
684
  return (c | 64);
685
}
686
687
inline static char my_toupper(const charset_info_st *s, char c)
688
{
689
  return (char)(s->to_upper[(unsigned char)c]);
690
}
691
692
inline static char my_tolower(const charset_info_st *s, char c)
693
{
694
  return (char)(s->to_lower[(unsigned char)c]);
695
}
696
697
inline static bool my_isalpha(const charset_info_st *s, char c)
698
{
699
  return ((s->ctype+1)[(unsigned char)c] & (_MY_U | _MY_L));
700
}
701
702
inline static bool my_isupper(const charset_info_st *s, char c)
703
{
704
  return ((s->ctype+1)[(unsigned char)c] & _MY_U);
705
}
706
707
inline static bool my_islower(const charset_info_st *s, char c)
708
{
709
  return ((s->ctype+1)[(unsigned char)c] & _MY_L);
710
}
711
712
inline static bool my_isdigit(const charset_info_st *s, char c)
713
{
714
  return ((s->ctype+1)[(unsigned char)c] & _MY_NMR);
715
}
716
717
inline static bool my_isxdigit(const charset_info_st *s, char c)
718
{
719
  return ((s->ctype+1)[(unsigned char)c] & _MY_X);
720
}
721
722
inline static bool my_isalnum(const charset_info_st *s, char c) 
723
{
724
  return ((s->ctype+1)[(unsigned char)c] & (_MY_U | _MY_L | _MY_NMR));
725
}
726
727
inline static bool my_isspace(const charset_info_st *s, char c)
728
{
729
  return ((s->ctype+1)[(unsigned char)c] & _MY_SPC);
730
}
731
732
inline static bool my_ispunct(const charset_info_st *s, char c)  
733
{
734
  return ((s->ctype+1)[(unsigned char)c] & _MY_PNT);
735
}
736
737
inline static bool my_isprint(const charset_info_st *s, char c)  
738
{
739
  return ((s->ctype+1)[(unsigned char)c] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B));
740
}
741
742
inline static bool my_isgraph(const charset_info_st *s, char c)
743
{
744
  return ((s->ctype+1)[(unsigned char)c] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR));
745
}
746
747
inline static bool my_iscntrl(const charset_info_st *s, char c)  
748
{
749
  return ((s->ctype+1)[(unsigned char)c] & _MY_CTR);
750
}
1 by Monty Taylor
Import upstream version 2010.03.1347
751
752
/* Some macros that should be cleaned up a little */
1.2.1 by Monty Taylor
Import upstream version 2010.11.03
753
inline static bool my_isvar(const charset_info_st *s, char c)
754
{
755
  return (my_isalnum(s,c) || (c) == '_');
756
}
757
758
inline static bool my_isvar_start(const charset_info_st *s, char c)
759
{
760
  return (my_isalpha(s,c) || (c) == '_');
761
}
762
763
inline static bool my_binary_compare(const charset_info_st *s)
764
{
765
  return (s->state  & MY_CS_BINSORT);
766
}
767
768
inline static bool use_strnxfrm(const charset_info_st *s)
769
{
770
  return (s->state & MY_CS_STRNXFRM);
771
}
772
773
inline static size_t my_strnxfrm(const charset_info_st *cs, 
774
                                 unsigned char *dst, 
775
                                 const size_t dstlen, 
776
                                 const unsigned char *src, 
777
                                 const uint32_t srclen)
778
{
779
  return (cs->coll->strnxfrm(cs, dst, dstlen, dstlen, src, srclen, MY_STRXFRM_PAD_WITH_SPACE));
780
}
781
782
inline static int my_strnncoll(const charset_info_st *cs, 
783
                               const unsigned char *s, 
784
                               const size_t slen, 
785
                               const unsigned char *t,
786
                               const size_t tlen) 
787
{
788
  return (cs->coll->strnncoll(cs, s, slen, t, tlen, 0));
789
}
790
791
inline static bool my_like_range(const charset_info_st *cs,
792
                                 const char *ptr, const size_t ptrlen,
793
                                 const char escape, 
794
                                 const char w_one,
795
                                 const char w_many, 
796
                                 const size_t reslen, 
797
                                 char *minstr, char *maxstr, 
798
                                 size_t *minlen, size_t *maxlen)
799
{
800
  return (cs->coll->like_range(cs, ptr, ptrlen, escape, w_one, w_many, reslen, 
801
                               minstr, maxstr, minlen, maxlen));
802
}
803
804
inline static int my_wildcmp(const charset_info_st *cs,
805
                             const char *str, const char *strend,
806
                             const char *w_str, const char *w_strend,
807
                             const int escape,
808
                             const int w_one, const int w_many) 
809
{
810
  return (cs->coll->wildcmp(cs, str, strend, w_str, w_strend, escape, w_one, w_many));
811
}
812
813
inline static int my_strcasecmp(const charset_info_st *cs, const char *s, const char *t)
814
{
815
  return (cs->coll->strcasecmp(cs, s, t));
816
}
817
818
template <typename CHAR_T>
819
inline static size_t my_charpos(const charset_info_st *cs, 
820
                                const CHAR_T *b, const CHAR_T* e, size_t num)
821
{
822
  return (cs->cset->charpos(cs, (const char*) b, (const char *)e, num));
823
}
824
825
inline static bool use_mb(const charset_info_st *cs)
826
{
827
  return (cs->cset->ismbchar != NULL);
828
}
829
830
inline static unsigned int  my_ismbchar(const charset_info_st *cs, const char *a, const char *b)
831
{
832
  return (cs->cset->ismbchar(cs, a, b));
833
}
834
835
inline static unsigned int my_mbcharlen(const charset_info_st *cs, uint32_t c)
836
{
837
  return (cs->cset->mbcharlen(cs, c));
838
}
839
840
841
inline static size_t my_caseup_str(const charset_info_st *cs, char *src)
842
{
843
  return (cs->cset->caseup_str(cs, src));
844
}
845
846
inline static size_t my_casedn_str(const charset_info_st *cs, char *src)
847
{
848
  return (cs->cset->casedn_str(cs, src));
849
}
850
851
inline static long my_strntol(const charset_info_st *cs, 
852
                              const char* s, const size_t l, const int base, char **e, int *err)
853
{
854
  return (cs->cset->strntol(cs, s, l, base, e, err));
855
}
856
857
inline static unsigned long my_strntoul(const charset_info_st *cs, 
858
                                        const char* s, const size_t l, const int base, 
859
                                        char **e, int *err)
860
{
861
  return (cs->cset->strntoul(cs, s, l, base, e, err));
862
}
863
864
inline static int64_t my_strntoll(const charset_info_st *cs, 
865
                                 const char* s, const size_t l, const int base, char **e, int *err)
866
{
867
  return (cs->cset->strntoll(cs, s, l, base, e, err));
868
}
869
870
inline static int64_t my_strntoull(const charset_info_st *cs, 
871
                                   const char* s, const size_t l, const int base, 
872
                                   char **e, int *err)
873
{
874
  return (cs->cset->strntoull(cs, s, l, base, e, err));
875
}
876
877
878
inline static double my_strntod(const charset_info_st *cs, 
879
                                char* s, const size_t l, char **e, int *err)
880
{
881
  return (cs->cset->strntod(cs, s, l, e, err));
882
}
1 by Monty Taylor
Import upstream version 2010.03.1347
883
884
int make_escape_code(const CHARSET_INFO * const cs, const char *escape);
885
886
} /* namespace drizzled */
887
888
#endif /* DRIZZLED_CHARSET_INFO_H */