1
/* Copyright (C) 2000 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
18
#include "drizzled/charset.h"
19
#include "drizzled/error.h"
20
#include "drizzled/charset_info.h"
21
#include "drizzled/internal/m_string.h"
22
#include <drizzled/configmake.h>
31
We collect memory in this vector that we free on delete.
33
static vector<void *>memory_vector;
36
The code below implements this functionality:
38
- Initializing charset related structures
39
- Loading dynamic charsets
40
- Searching for a proper CHARSET_INFO
41
using charset name, collation name or collation ID
42
- Setting server default character set
45
bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
47
return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
52
get_collation_number_internal(const char *name)
55
for (cs= all_charsets;
56
cs < all_charsets+array_elements(all_charsets)-1 ;
59
if ( cs[0] && cs[0]->name &&
60
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
67
static bool init_state_maps(CHARSET_INFO *cs)
70
unsigned char *state_map;
71
unsigned char *ident_map;
73
if (!(cs->state_map= (unsigned char*) cs_alloc(256)))
76
if (!(cs->ident_map= (unsigned char*) cs_alloc(256)))
79
state_map= cs->state_map;
80
ident_map= cs->ident_map;
82
/* Fill state_map with states to get a faster parser */
83
for (i=0; i < 256 ; i++)
86
state_map[i]=(unsigned char) MY_LEX_IDENT;
87
else if (my_isdigit(cs,i))
88
state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
89
else if (my_mbcharlen(cs, i)>1)
90
state_map[i]=(unsigned char) MY_LEX_IDENT;
91
else if (my_isspace(cs,i))
92
state_map[i]=(unsigned char) MY_LEX_SKIP;
94
state_map[i]=(unsigned char) MY_LEX_CHAR;
96
state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT;
97
state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING;
98
state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT;
99
state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP;
100
state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP;
101
state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL;
102
state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT;
103
state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON;
104
state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR;
105
state_map[0]=(unsigned char) MY_LEX_EOL;
106
state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE;
107
state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT;
108
state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT;
109
state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END;
110
state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER;
111
state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER;
114
Create a second map to make it faster to find identifiers
116
for (i=0; i < 256 ; i++)
118
ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT ||
119
state_map[i] == MY_LEX_NUMBER_IDENT);
122
/* Special handling of hex and binary strings */
123
state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX;
124
state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN;
129
static bool charset_initialized= false;
131
CHARSET_INFO *all_charsets[256];
132
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
134
void add_compiled_collation(CHARSET_INFO * cs)
136
all_charsets[cs->number]= cs;
137
cs->state|= MY_CS_AVAILABLE;
140
void *cs_alloc(size_t size)
142
void *ptr= malloc(size);
144
memory_vector.push_back(ptr);
151
static bool init_available_charsets(myf myflags)
155
We have to use charset_initialized to not lock on THR_LOCK_charset
156
inside get_internal_charset...
158
if (charset_initialized == false)
161
memset(&all_charsets, 0, sizeof(all_charsets));
162
init_compiled_charsets(myflags);
164
/* Copy compiled charsets */
165
for (cs=all_charsets;
166
cs < all_charsets+array_elements(all_charsets)-1 ;
172
if (init_state_maps(*cs))
177
charset_initialized= true;
179
assert(charset_initialized);
185
void free_charsets(void)
187
charset_initialized= true;
189
while (memory_vector.empty() == false)
191
void *ptr= memory_vector.back();
192
memory_vector.pop_back();
195
memory_vector.clear();
200
uint32_t get_collation_number(const char *name)
202
init_available_charsets(MYF(0));
203
return get_collation_number_internal(name);
207
uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
210
init_available_charsets(MYF(0));
212
for (cs= all_charsets;
213
cs < all_charsets+array_elements(all_charsets)-1 ;
216
if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
217
!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
218
return cs[0]->number;
224
const char *get_charset_name(uint32_t charset_number)
226
const CHARSET_INFO *cs;
227
init_available_charsets(MYF(0));
229
cs=all_charsets[charset_number];
230
if (cs && (cs->number == charset_number) && cs->name )
231
return (char*) cs->name;
233
return (char*) "?"; /* this mimics find_type() */
237
static const CHARSET_INFO *get_internal_charset(uint32_t cs_number)
241
To make things thread safe we are not allowing other threads to interfere
242
while we may changing the cs_info_table
244
if ((cs= all_charsets[cs_number]))
246
if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
250
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
252
if (cs && !(cs->state & MY_CS_READY))
254
if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
255
(cs->coll->init && cs->coll->init(cs, cs_alloc)))
258
cs->state|= MY_CS_READY;
265
const CHARSET_INFO *get_charset(uint32_t cs_number)
267
const CHARSET_INFO *cs;
268
if (cs_number == default_charset_info->number)
269
return default_charset_info;
271
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
273
if (!cs_number || cs_number >= array_elements(all_charsets)-1)
276
cs= get_internal_charset(cs_number);
281
const CHARSET_INFO *get_charset_by_name(const char *cs_name)
284
const CHARSET_INFO *cs;
285
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
287
cs_number= get_collation_number(cs_name);
288
cs= cs_number ? get_internal_charset(cs_number) : NULL;
294
const CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint32_t cs_flags)
297
const CHARSET_INFO *cs;
299
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
301
cs_number= get_charset_number(cs_name, cs_flags);
302
cs= cs_number ? get_internal_charset(cs_number) : NULL;
309
Escape apostrophes by doubling them up
312
escape_quotes_for_drizzle()
313
charset_info Charset of the strings
314
to Buffer for escaped string
315
to_length Length of destination buffer, or 0
316
from The string to escape
317
length The length of the string to escape
320
This escapes the contents of a string by doubling up any apostrophes that
321
it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
322
effect on the server.
325
To be consistent with escape_string_for_mysql(), to_length may be 0 to
329
UINT32_MAX The escaped string did not fit in the to buffer
330
>=0 The length of the escaped string
333
size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info,
334
char *to, size_t to_length,
335
const char *from, size_t length)
337
const char *to_start= to;
338
const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
339
bool overflow= false;
340
bool use_mb_flag= use_mb(charset_info);
341
for (end= from + length; from < end; from++)
344
if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
346
if (to + tmp_length > to_end)
357
We don't have the same issue here with a non-multi-byte character being
358
turned into a multi-byte character by the addition of an escaping
359
character, because we are only escaping the ' character with itself.
382
return overflow ? UINT32_MAX : (uint32_t) (to - to_start);
385
} /* namespace drizzled */