1
/* Copyright (C) 2000 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
/* This file is originally from the mysql distribution. Coded by monty */
20
#include "drizzled/internal/my_sys.h"
21
#include "drizzled/internal/m_string.h"
22
#include "drizzled/charset.h"
23
#include "drizzled/global_charset_info.h"
27
#include "drizzled/sql_string.h"
34
// Converstion functions to and from std::string.
36
std::string String_to_std_string(String const& s)
38
return std::string(s.ptr(), s.length());
41
String* set_String_from_std_string(String* s, std::string const& cs)
43
s->set_ascii(cs.c_str(), cs.length());
48
/*****************************************************************************
50
*****************************************************************************/
57
str_charset(&my_charset_bin)
61
String::String(size_t length_arg)
66
str_charset(&my_charset_bin)
68
(void) real_alloc(length_arg);
71
String::String(const char *str, const CHARSET_INFO * const cs)
72
: Ptr(const_cast<char *>(str)),
73
str_length(static_cast<size_t>(strlen(str))),
80
String::String(const char *str, size_t len, const CHARSET_INFO * const cs)
81
: Ptr(const_cast<char *>(str)),
89
String::String(char *str, size_t len, const CHARSET_INFO * const cs)
98
String::String(const String &str)
100
str_length(str.str_length),
101
Alloced_length(str.Alloced_length),
103
str_charset(str.str_charset)
107
void *String::operator new(size_t size, memory::Root *mem_root)
109
return mem_root->alloc_root(static_cast<size_t>(size));
112
String::~String() { free(); }
114
bool String::real_alloc(size_t arg_length)
116
arg_length=ALIGN_SIZE(arg_length+1);
118
if (Alloced_length < arg_length)
120
if (Alloced_length > 0)
122
if (!(Ptr=(char*) malloc(arg_length)))
124
Alloced_length=arg_length;
133
** Check that string is big enough. Set string[alloc_length] to 0
137
bool String::realloc(size_t alloc_length)
139
size_t len=ALIGN_SIZE(alloc_length+1);
140
if (Alloced_length < len)
145
if ((new_ptr= (char*) ::realloc(Ptr,len)))
151
return true; // Signal error
153
else if ((new_ptr= (char*) malloc(len)))
155
if (str_length) // Avoid bugs in memcpy on AIX
156
memcpy(new_ptr,Ptr,str_length);
157
new_ptr[str_length]=0;
163
return true; // Signal error
165
Ptr[alloc_length]=0; // This make other funcs shorter
169
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
171
size_t l=20*cs->mbmaxlen+1;
172
int base= unsigned_flag ? 10 : -10;
176
str_length=(size_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
181
bool String::set_real(double num,size_t decimals, const CHARSET_INFO * const cs)
183
char buff[FLOATING_POINT_BUFFER];
188
if (decimals >= NOT_FIXED_DEC)
190
len= internal::my_gcvt(num,
191
internal::MY_GCVT_ARG_DOUBLE,
192
sizeof(buff) - 1, buff, NULL);
193
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
195
len= internal::my_fcvt(num, decimals, buff, NULL);
196
return copy(buff, (size_t) len, &my_charset_utf8_general_ci, cs,
205
Alloced_length=0; // Force realloc
206
return realloc(str_length);
211
bool String::copy(const String &str)
213
if (alloc(str.str_length))
215
str_length=str.str_length;
216
memmove(Ptr, str.Ptr, str_length); // May be overlapping
218
str_charset=str.str_charset;
222
bool String::copy(const char *str,size_t arg_length, const CHARSET_INFO * const cs)
224
if (alloc(arg_length))
226
if ((str_length=arg_length))
227
memcpy(Ptr,str,arg_length);
234
Checks that the source string can be just copied to the destination string
240
arg_length Length of string to copy.
241
from_cs Character set to copy from
242
to_cs Character set to copy to
243
size_t *offset Returns number of unaligned characters.
246
0 No conversion needed
247
1 Either character set conversion or adding leading zeros
248
(e.g. for UCS-2) must be done
251
to_cs may be NULL for "no conversion" if the system variable
252
character_set_results is NULL.
255
bool String::needs_conversion(size_t arg_length,
256
const CHARSET_INFO * const from_cs,
257
const CHARSET_INFO * const to_cs,
262
(to_cs == &my_charset_bin) ||
263
(to_cs == from_cs) ||
264
my_charset_same(from_cs, to_cs) ||
265
((from_cs == &my_charset_bin) &&
266
(!(*offset=(arg_length % to_cs->mbminlen)))))
274
bool String::set_or_copy_aligned(const char *str,size_t arg_length,
275
const CHARSET_INFO * const cs)
277
/* How many bytes are in incomplete character */
278
size_t offset= (arg_length % cs->mbminlen);
280
assert(!offset); /* All characters are complete, just copy */
282
set(str, arg_length, cs);
286
/* Copy with charset conversion */
288
bool String::copy(const char *str, size_t arg_length,
289
const CHARSET_INFO * const,
290
const CHARSET_INFO * const to_cs, size_t *errors)
293
return copy(str, arg_length, to_cs);
298
Set a string to the value of a latin1-string, keeping the original charset
302
str String of a simple charset (latin1)
303
arg_length Length of string
306
If string object is of a simple character set, set it to point to the
308
If not, make a copy and convert it to the new character set.
312
1 Could not allocate result buffer
316
bool String::set_ascii(const char *str, size_t arg_length)
318
if (str_charset->mbminlen == 1)
320
set(str, arg_length, str_charset);
324
return copy(str, arg_length, &my_charset_utf8_general_ci, str_charset, &dummy_errors);
327
bool String::append(const String &s)
331
if (realloc(str_length+s.length()))
333
memcpy(Ptr+str_length,s.ptr(),s.length());
334
str_length+=s.length();
341
Append an ASCII string to the a string of the current character set
344
bool String::append(const char *s,size_t arg_length)
350
For an ASCII compatinble string we can just append.
352
if (realloc(str_length+arg_length))
354
memcpy(Ptr+str_length,s,arg_length);
355
str_length+=arg_length;
361
Append a 0-terminated ASCII string
364
bool String::append(const char *s)
366
return append(s, strlen(s));
371
Append a string in the given charset to the string
372
with character set recoding
375
bool String::append(const char *s,size_t arg_length, const CHARSET_INFO * const)
377
if (realloc(str_length + arg_length))
379
memcpy(Ptr + str_length, s, arg_length);
380
str_length+= arg_length;
386
bool String::append_with_prefill(const char *s,size_t arg_length,
387
size_t full_length, char fill_char)
389
int t_length= arg_length > full_length ? arg_length : full_length;
391
if (realloc(str_length + t_length))
393
t_length= full_length - arg_length;
396
memset(Ptr+str_length, fill_char, t_length);
397
str_length=str_length + t_length;
399
append(s, arg_length);
403
size_t String::numchars()
405
return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
408
int String::charpos(int i,size_t offset)
412
return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
415
int String::strstr(const String &s,size_t offset)
417
if (s.length()+offset <= str_length)
420
return ((int) offset); // Empty string is always found
422
register const char *str = Ptr+offset;
423
register const char *search=s.ptr();
424
const char *end=Ptr+str_length-s.length()+1;
425
const char *search_end=s.ptr()+s.length();
429
if (*str++ == *search)
432
i=(char*) str; j=(char*) search+1;
433
while (j != search_end)
434
if (*i++ != *j++) goto skip;
435
return (int) (str-Ptr) -1;
443
** Search string from end. Offset is offset to the end of string
446
int String::strrstr(const String &s,size_t offset)
448
if (s.length() <= offset && offset <= str_length)
451
return offset; // Empty string is always found
452
register const char *str = Ptr+offset-1;
453
register const char *search=s.ptr()+s.length()-1;
455
const char *end=Ptr+s.length()-2;
456
const char *search_end=s.ptr()-1;
460
if (*str-- == *search)
463
i=(char*) str; j=(char*) search-1;
464
while (j != search_end)
465
if (*i-- != *j--) goto skip;
466
return (int) (i-Ptr) +1;
474
Replace substring with string
475
If wrong parameter or not enough memory, do nothing
478
bool String::replace(size_t offset,size_t arg_length,const String &to)
480
return replace(offset,arg_length,to.ptr(),to.length());
483
bool String::replace(size_t offset,size_t arg_length,
484
const char *to, size_t to_length)
486
long diff = (long) to_length-(long) arg_length;
487
if (offset+arg_length <= str_length)
492
memcpy(Ptr+offset,to,to_length);
493
memmove(Ptr+offset+to_length, Ptr+offset+arg_length,
494
str_length-offset-arg_length);
500
if (realloc(str_length+(size_t) diff))
502
internal::bmove_upp((unsigned char*) Ptr+str_length+diff,
503
(unsigned char*) Ptr+str_length,
504
str_length-offset-arg_length);
507
memcpy(Ptr+offset,to,to_length);
509
str_length+=(size_t) diff;
517
Compare strings according to collation, without end space.
526
Normally this is case sensitive comparison
535
int sortcmp(const String *s,const String *t, const CHARSET_INFO * const cs)
537
return cs->coll->strnncollsp(cs,
538
(unsigned char *) s->ptr(),s->length(),
539
(unsigned char *) t->ptr(),t->length(), 0);
544
Compare strings byte by byte. End spaces are also compared.
552
Strings are compared as a stream of unsigned chars
561
int stringcmp(const String *s,const String *t)
563
size_t s_len= s->length(), t_len= t->length(), len= min(s_len,t_len);
564
int cmp= memcmp(s->ptr(), t->ptr(), len);
565
return (cmp) ? cmp : (int) (s_len - t_len);
569
String *copy_if_not_alloced(String *to,String *from,size_t from_length)
571
if (from->Alloced_length >= from_length)
573
if (from->alloced || !to || from == to)
575
(void) from->realloc(from_length);
578
if (to->realloc(from_length))
579
return from; // Actually an error
580
if ((to->str_length= min(from->str_length,from_length)))
581
memcpy(to->Ptr,from->Ptr,to->str_length);
582
to->str_charset=from->str_charset;
587
/****************************************************************************
589
****************************************************************************/
593
with optional character set conversion,
594
with optional left padding (for binary -> UCS2 conversion)
597
well_formed_copy_nchars()
599
to_length Maxinum length of "to" string
600
to_cs Character set of "to" string
602
from_length Length of from string
603
from_cs From character set
604
nchars Copy not more that nchars characters
605
well_formed_error_pos Return position when "from" is not well formed
607
cannot_convert_error_pos Return position where a not convertable
608
character met, or NULL otherwise.
609
from_end_pos Return position where scanning of "from"
614
length of bytes copied to 'to'
619
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
620
char *to, size_t to_length,
621
const CHARSET_INFO * const from_cs,
622
const char *from, size_t from_length,
624
const char **well_formed_error_pos,
625
const char **cannot_convert_error_pos,
626
const char **from_end_pos)
630
assert((to_cs == &my_charset_bin) ||
631
(from_cs == &my_charset_bin) ||
632
(to_cs == from_cs) ||
633
my_charset_same(from_cs, to_cs));
635
if (to_length < to_cs->mbminlen || !nchars)
638
*cannot_convert_error_pos= NULL;
639
*well_formed_error_pos= NULL;
643
if (to_cs == &my_charset_bin)
645
res= min(min(nchars, to_length), from_length);
646
memmove(to, from, res);
647
*from_end_pos= from + res;
648
*well_formed_error_pos= NULL;
649
*cannot_convert_error_pos= NULL;
653
int well_formed_error;
656
if ((from_offset= (from_length % to_cs->mbminlen)) &&
657
(from_cs == &my_charset_bin))
660
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
661
INSERT INTO t1 (ucs2_column) VALUES (0x01);
664
size_t pad_length= to_cs->mbminlen - from_offset;
665
memset(to, 0, pad_length);
666
memmove(to + pad_length, from, from_offset);
669
from_length-= from_offset;
670
to+= to_cs->mbminlen;
671
to_length-= to_cs->mbminlen;
674
set_if_smaller(from_length, to_length);
675
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
676
nchars, &well_formed_error);
677
memmove(to, from, res);
678
*from_end_pos= from + res;
679
*well_formed_error_pos= well_formed_error ? from + res : NULL;
680
*cannot_convert_error_pos= NULL;
682
res+= to_cs->mbminlen;
691
void String::print(String *str)
693
char *st= (char*)Ptr, *end= st+str_length;
694
for (; st < end; st++)
696
unsigned char c= *st;
700
str->append("\\\\", sizeof("\\\\")-1);
703
str->append("\\0", sizeof("\\0")-1);
706
str->append("\\'", sizeof("\\'")-1);
709
str->append("\\n", sizeof("\\n")-1);
712
str->append("\\r", sizeof("\\r")-1);
714
case '\032': // Ctrl-Z
715
str->append("\\Z", sizeof("\\Z")-1);
724
Quote the given identifier.
725
If the given identifier is empty, it will be quoted.
729
name the identifier to be appended
730
name_length length of the appending identifier
733
/* Factor the extern out */
734
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
736
void String::append_identifier(const char *name, size_t in_length)
738
const char *name_end;
743
The identifier must be quoted as it includes a quote character or
747
reserve(in_length*2 + 2);
748
quote_char= (char) q;
749
append("e_char, 1, system_charset_info);
751
for (name_end= name+in_length ; name < name_end ; name+= in_length)
753
unsigned char chr= (unsigned char) *name;
754
in_length= my_mbcharlen(system_charset_info, chr);
756
my_mbcharlen can return 0 on a wrong multibyte
757
sequence. It is possible when upgrading from 4.0,
758
and identifier contains some accented characters.
759
The manual says it does not work. So we'll just
760
change length to 1 not to hang in the endless loop.
764
if (in_length == 1 && chr == (unsigned char) quote_char)
765
append("e_char, 1, system_charset_info);
766
append(name, in_length, system_charset_info);
768
append("e_char, 1, system_charset_info);
773
Exchange state of this object and argument.
779
Target string will contain state of this object and vice versa.
782
void String::swap(String &s)
784
std::swap(Ptr, s.Ptr);
785
std::swap(str_length, s.str_length);
786
std::swap(Alloced_length, s.Alloced_length);
787
std::swap(alloced, s.alloced);
788
std::swap(str_charset, s.str_charset);
791
void String::q_append(const size_t n)
793
int8store(Ptr + str_length, n);
796
void String::q_append(double d)
798
float8store(Ptr + str_length, d);
801
void String::q_append(double *d)
803
float8store(Ptr + str_length, *d);
806
void String::q_append(const char *data, size_t data_len)
808
memcpy(Ptr + str_length, data, data_len);
809
str_length += data_len;
812
void String::write_at_position(int position, size_t value)
814
int8store(Ptr + position,value);
816
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
819
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
822
} /* namespace drizzled */
824
bool operator==(const drizzled::String &s1, const drizzled::String &s2)
826
return stringcmp(&s1,&s2) == 0;
829
bool operator!=(const drizzled::String &s1, const drizzled::String &s2)