2
Unix SMB/CIFS implementation.
3
Character set conversion Extensions
4
Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5
Copyright (C) Andrew Tridgell 2001
6
Copyright (C) Simo Sorce 2001
7
Copyright (C) Martin Pool 2003
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 3 of the License, or
12
(at your option) any later version.
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
GNU General Public License for more details.
19
You should have received a copy of the GNU General Public License
20
along with this program. If not, see <http://www.gnu.org/licenses/>.
25
/* We can parameterize this if someone complains.... JRA. */
27
char lp_failed_convert_char(void)
35
* @brief Character-set conversion routines built on our iconv.
37
* @note Samba's internal character set (at least in the 3.0 series)
38
* is always the same as the one for the Unix filesystem. It is
39
* <b>not</b> necessarily UTF-8 and may be different on machines that
40
* need i18n filenames to be compatible with Unix software. It does
41
* have to be a superset of ASCII. All multibyte sequences must start
42
* with a byte with the high bit set.
48
static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49
static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50
static bool initialized;
53
* Return the name of a charset to give to iconv().
55
static const char *charset_name(charset_t ch)
57
const char *ret = NULL;
59
if (ch == CH_UTF16LE) ret = "UTF-16LE";
60
else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61
else if (ch == CH_UNIX) ret = lp_unix_charset();
62
else if (ch == CH_DOS) ret = lp_dos_charset();
63
else if (ch == CH_DISPLAY) ret = lp_display_charset();
64
else if (ch == CH_UTF8) ret = "UTF8";
66
#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67
if (ret && !strcmp(ret, "LOCALE")) {
68
const char *ln = NULL;
71
setlocale(LC_ALL, "");
73
ln = nl_langinfo(CODESET);
75
/* Check whether the charset name is supported
77
smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78
if (handle == (smb_iconv_t) -1) {
79
DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
82
DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83
smb_iconv_close(handle);
90
if (!ret || !*ret) ret = "ASCII";
94
void lazy_initialize_conv(void)
104
* Destroy global objects allocated by init_iconv()
106
void gfree_charcnv(void)
110
for (c1=0;c1<NUM_CHARSETS;c1++) {
111
for (c2=0;c2<NUM_CHARSETS;c2++) {
112
if ( conv_handles[c1][c2] ) {
113
smb_iconv_close( conv_handles[c1][c2] );
114
conv_handles[c1][c2] = 0;
122
* Initialize iconv conversion descriptors.
124
* This is called the first time it is needed, and also called again
125
* every time the configuration is reloaded, because the charset or
126
* codepage might have changed.
128
void init_iconv(void)
131
bool did_reload = False;
133
/* so that charset_name() works we need to get the UNIX<->UCS2 going
135
if (!conv_handles[CH_UNIX][CH_UTF16LE])
136
conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
138
if (!conv_handles[CH_UTF16LE][CH_UNIX])
139
conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
141
for (c1=0;c1<NUM_CHARSETS;c1++) {
142
for (c2=0;c2<NUM_CHARSETS;c2++) {
143
const char *n1 = charset_name((charset_t)c1);
144
const char *n2 = charset_name((charset_t)c2);
145
if (conv_handles[c1][c2] &&
146
strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147
strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
152
if (conv_handles[c1][c2])
153
smb_iconv_close(conv_handles[c1][c2]);
155
conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156
if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157
DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158
charset_name((charset_t)c1), charset_name((charset_t)c2)));
159
if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
162
if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
165
DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
167
conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168
if (!conv_handles[c1][c2]) {
169
DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170
smb_panic("init_iconv: conv_handle initialization failed");
177
/* XXX: Does this really get called every time the dos
178
* codepage changes? */
179
/* XXX: Is the did_reload test too strict? */
187
* Convert string from one encoding to another, making error checking etc
188
* Slow path version - uses (slow) iconv.
190
* @param src pointer to source string (multibyte or singlebyte)
191
* @param srclen length of the source string in bytes
192
* @param dest pointer to destination string (multibyte or singlebyte)
193
* @param destlen maximal length allowed for string
194
* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195
* @returns the number of bytes occupied in the destination
197
* Ensure the srclen contains the terminating zero.
201
static size_t convert_string_internal(charset_t from, charset_t to,
202
void const *src, size_t srclen,
203
void *dest, size_t destlen, bool allow_bad_conv)
207
const char* inbuf = (const char*)src;
208
char* outbuf = (char*)dest;
209
smb_iconv_t descriptor;
211
lazy_initialize_conv();
213
descriptor = conv_handles[from][to];
215
if (srclen == (size_t)-1) {
216
if (from == CH_UTF16LE || from == CH_UTF16BE) {
217
srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
219
srclen = strlen((const char *)src)+1;
224
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
226
DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
235
retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236
if(retval==(size_t)-1) {
237
const char *reason="unknown error";
240
reason="Incomplete multibyte sequence";
242
DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
247
reason="No more room";
249
if (from == CH_UNIX) {
250
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251
charset_name(from), charset_name(to),
252
(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
254
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255
charset_name(from), charset_name(to),
256
(unsigned int)srclen, (unsigned int)destlen));
261
reason="Illegal multibyte sequence";
263
DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270
DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
273
/* smb_panic(reason); */
275
return destlen-o_len;
280
* Conversion not supported. This is actually an error, but there are so
281
* many misconfigured iconv systems and smb.conf's out there we can't just
282
* fail. Do a very bad conversion instead.... JRA.
286
if (o_len == 0 || i_len == 0)
287
return destlen - o_len;
289
if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
290
((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
291
/* Can't convert from utf16 any endian to multibyte.
292
Replace with the default fail char.
295
return destlen - o_len;
297
*outbuf = lp_failed_convert_char();
306
if (o_len == 0 || i_len == 0)
307
return destlen - o_len;
309
/* Keep trying with the next char... */
312
} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
313
/* Can't convert to UTF16LE - just widen by adding the
314
default fail char then zero.
317
return destlen - o_len;
319
outbuf[0] = lp_failed_convert_char();
328
if (o_len == 0 || i_len == 0)
329
return destlen - o_len;
331
/* Keep trying with the next char... */
334
} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
335
to != CH_UTF16LE && to != CH_UTF16BE) {
336
/* Failed multibyte to multibyte. Just copy the default fail char and
338
outbuf[0] = lp_failed_convert_char();
346
if (o_len == 0 || i_len == 0)
347
return destlen - o_len;
349
/* Keep trying with the next char... */
353
/* Keep compiler happy.... */
354
return destlen - o_len;
360
* Convert string from one encoding to another, making error checking etc
361
* Fast path version - handles ASCII first.
363
* @param src pointer to source string (multibyte or singlebyte)
364
* @param srclen length of the source string in bytes, or -1 for nul terminated.
365
* @param dest pointer to destination string (multibyte or singlebyte)
366
* @param destlen maximal length allowed for string - *NEVER* -1.
367
* @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
368
* @returns the number of bytes occupied in the destination
370
* Ensure the srclen contains the terminating zero.
372
* This function has been hand-tuned to provide a fast path.
373
* Don't change unless you really know what you are doing. JRA.
376
size_t convert_string(charset_t from, charset_t to,
377
void const *src, size_t srclen,
378
void *dest, size_t destlen, bool allow_bad_conv)
381
* NB. We deliberately don't do a strlen here if srclen == -1.
382
* This is very expensive over millions of calls and is taken
383
* care of in the slow path in convert_string_internal. JRA.
387
SMB_ASSERT(destlen != (size_t)-1);
393
if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
394
const unsigned char *p = (const unsigned char *)src;
395
unsigned char *q = (unsigned char *)dest;
396
size_t slen = srclen;
397
size_t dlen = destlen;
398
unsigned char lastp = '\0';
401
/* If all characters are ascii, fast path here. */
402
while (slen && dlen) {
403
if ((lastp = *p) <= 0x7f) {
405
if (slen != (size_t)-1) {
413
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
416
size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
417
if (ret == (size_t)-1) {
425
/* Even if we fast path we should note if we ran out of room. */
426
if (((slen != (size_t)-1) && slen) ||
427
((slen == (size_t)-1) && lastp)) {
432
} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
433
const unsigned char *p = (const unsigned char *)src;
434
unsigned char *q = (unsigned char *)dest;
436
size_t slen = srclen;
437
size_t dlen = destlen;
438
unsigned char lastp = '\0';
440
/* If all characters are ascii, fast path here. */
441
while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
442
if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
444
if (slen != (size_t)-1) {
453
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
456
size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
457
if (ret == (size_t)-1) {
465
/* Even if we fast path we should note if we ran out of room. */
466
if (((slen != (size_t)-1) && slen) ||
467
((slen == (size_t)-1) && lastp)) {
472
} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
473
const unsigned char *p = (const unsigned char *)src;
474
unsigned char *q = (unsigned char *)dest;
476
size_t slen = srclen;
477
size_t dlen = destlen;
478
unsigned char lastp = '\0';
480
/* If all characters are ascii, fast path here. */
481
while (slen && (dlen >= 2)) {
482
if ((lastp = *p) <= 0x7F) {
485
if (slen != (size_t)-1) {
493
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
496
size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
497
if (ret == (size_t)-1) {
505
/* Even if we fast path we should note if we ran out of room. */
506
if (((slen != (size_t)-1) && slen) ||
507
((slen == (size_t)-1) && lastp)) {
514
#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
517
return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
521
* Convert between character sets, allocating a new buffer for the result.
523
* @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
524
* (this is a bad interface and needs fixing. JRA).
525
* @param srclen length of source buffer.
526
* @param dest always set at least to NULL
527
* @param converted_size set to the size of the allocated buffer on return
529
* @note -1 is not accepted for srclen.
531
* @return true if new buffer was correctly allocated, and string was
534
* Ensure the srclen contains the terminating zero.
536
* I hate the goto's in this function. It's embarressing.....
537
* There has to be a cleaner way to do this. JRA.
540
bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
541
void const *src, size_t srclen, void *dst,
542
size_t *converted_size, bool allow_bad_conv)
544
size_t i_len, o_len, destlen = (srclen * 3) / 2;
546
const char *inbuf = (const char *)src;
547
char *outbuf = NULL, *ob = NULL;
548
smb_iconv_t descriptor;
549
void **dest = (void **)dst;
553
if (!converted_size) {
558
if (src == NULL || srclen == (size_t)-1) {
563
ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
573
lazy_initialize_conv();
575
descriptor = conv_handles[from][to];
577
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
579
DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
586
/* +2 is for ucs2 null termination. */
587
if ((destlen*2)+2 < destlen) {
588
/* wrapped ! abort. */
590
DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
596
destlen = destlen * 2;
599
/* +2 is for ucs2 null termination. */
601
ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
603
ob = (char *)SMB_REALLOC(ob, destlen + 2);
607
DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
617
retval = smb_iconv(descriptor,
620
if(retval == (size_t)-1) {
621
const char *reason="unknown error";
624
reason="Incomplete multibyte sequence";
626
DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
633
reason="Illegal multibyte sequence";
635
DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
641
DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
642
/* smb_panic(reason); */
653
destlen = destlen - o_len;
654
/* Don't shrink unless we're reclaiming a lot of
655
* space. This is in the hot codepath and these
656
* reallocs *cost*. JRA.
659
/* We're shrinking here so we know the +2 is safe from wrap. */
661
ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
663
ob = (char *)SMB_REALLOC(ob,destlen + 2);
667
if (destlen && !ob) {
668
DEBUG(0, ("convert_string_allocate: out of memory!\n"));
675
/* Must ucs2 null terminate in the extra space we allocated. */
677
ob[destlen+1] = '\0';
679
*converted_size = destlen;
685
* Conversion not supported. This is actually an error, but there are so
686
* many misconfigured iconv systems and smb.conf's out there we can't just
687
* fail. Do a very bad conversion instead.... JRA.
691
if (o_len == 0 || i_len == 0)
694
if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
695
((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
696
/* Can't convert from utf16 any endian to multibyte.
697
Replace with the default fail char.
704
*outbuf = lp_failed_convert_char();
713
if (o_len == 0 || i_len == 0)
716
/* Keep trying with the next char... */
719
} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
720
/* Can't convert to UTF16LE - just widen by adding the
721
default fail char then zero.
726
outbuf[0] = lp_failed_convert_char();
735
if (o_len == 0 || i_len == 0)
738
/* Keep trying with the next char... */
741
} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
742
to != CH_UTF16LE && to != CH_UTF16BE) {
743
/* Failed multibyte to multibyte. Just copy the default fail char and
745
outbuf[0] = lp_failed_convert_char();
753
if (o_len == 0 || i_len == 0)
756
/* Keep trying with the next char... */
760
/* Keep compiler happy.... */
767
* Convert between character sets, allocating a new buffer using talloc for the result.
769
* @param srclen length of source buffer.
770
* @param dest always set at least to NULL
771
* @parm converted_size set to the number of bytes occupied by the string in
772
* the destination on success.
773
* @note -1 is not accepted for srclen.
775
* @return true if new buffer was correctly allocated, and string was
778
bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
779
void const *src, size_t srclen, void *dst,
780
size_t *converted_size, bool allow_bad_conv)
782
void **dest = (void **)dst;
785
return convert_string_allocate(ctx, from, to, src, srclen, dest,
786
converted_size, allow_bad_conv);
789
size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
794
if (!push_ucs2_allocate(&buffer, src, &size)) {
798
if (!strupper_w(buffer) && (dest == src)) {
803
size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
809
strdup() a unix string to upper case.
812
char *strdup_upper(const char *s)
814
char *out_buffer = SMB_STRDUP(s);
815
const unsigned char *p = (const unsigned char *)s;
816
unsigned char *q = (unsigned char *)out_buffer;
822
/* this is quite a common operation, so we want it to be
823
fast. We optimise for the ascii case, knowing that all our
824
supported multi-byte character sets are ascii-compatible
825
(ie. they match for the first 128 chars) */
830
*q++ = toupper_ascii_fast(*p);
836
size_t converted_size, converted_size2;
837
smb_ucs2_t *buffer = NULL;
839
SAFE_FREE(out_buffer);
840
if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
842
(void **)(void *)&buffer,
843
&converted_size, True))
850
if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
852
(void **)(void *)&out_buffer,
853
&converted_size2, True))
859
/* Don't need the intermediate buffer
869
talloc_strdup() a unix string to upper case.
872
char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
874
char *out_buffer = talloc_strdup(ctx,s);
875
const unsigned char *p = (const unsigned char *)s;
876
unsigned char *q = (unsigned char *)out_buffer;
882
/* this is quite a common operation, so we want it to be
883
fast. We optimise for the ascii case, knowing that all our
884
supported multi-byte character sets are ascii-compatible
885
(ie. they match for the first 128 chars) */
890
*q++ = toupper_ascii_fast(*p);
896
size_t converted_size, converted_size2;
897
smb_ucs2_t *ubuf = NULL;
899
/* We're not using the ascii buffer above. */
900
TALLOC_FREE(out_buffer);
902
if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
903
strlen(s)+1, (void *)&ubuf,
904
&converted_size, True))
911
if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
912
converted_size, (void *)&out_buffer,
913
&converted_size2, True))
919
/* Don't need the intermediate buffer
928
size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
931
smb_ucs2_t *buffer = NULL;
933
if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
934
(void **)(void *)&buffer, &size,
937
smb_panic("failed to create UCS2 buffer");
939
if (!strlower_w(buffer) && (dest == src)) {
943
size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
949
strdup() a unix string to lower case.
952
char *strdup_lower(const char *s)
954
size_t converted_size;
955
smb_ucs2_t *buffer = NULL;
958
if (!push_ucs2_allocate(&buffer, s, &converted_size)) {
964
if (!pull_ucs2_allocate(&out_buffer, buffer, &converted_size)) {
974
char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
976
size_t converted_size;
977
smb_ucs2_t *buffer = NULL;
980
if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
986
if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
997
size_t ucs2_align(const void *base_ptr, const void *p, int flags)
999
if (flags & (STR_NOALIGN|STR_ASCII))
1001
return PTR_DIFF(p, base_ptr) & 1;
1006
* Copy a string from a char* unix src to a dos codepage string destination.
1008
* @return the number of bytes occupied by the string in the destination.
1010
* @param flags can include
1012
* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1013
* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1016
* @param dest_len the maximum length in bytes allowed in the
1019
size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1021
size_t src_len = strlen(src);
1022
char *tmpbuf = NULL;
1025
/* No longer allow a length of -1. */
1026
if (dest_len == (size_t)-1) {
1027
smb_panic("push_ascii - dest_len == -1");
1030
if (flags & STR_UPPER) {
1031
tmpbuf = SMB_STRDUP(src);
1033
smb_panic("malloc fail");
1039
if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1043
ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1044
if (ret == (size_t)-1 &&
1045
(flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1047
((char *)dest)[0] = '\0';
1053
size_t push_ascii_fstring(void *dest, const char *src)
1055
return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1058
/********************************************************************
1059
Push an nstring - ensure null terminated. Written by
1060
moriyama@miraclelinux.com (MORIYAMA Masayuki).
1061
********************************************************************/
1063
size_t push_ascii_nstring(void *dest, const char *src)
1065
size_t i, buffer_len, dest_len;
1069
if (!push_ucs2_allocate(&buffer, src, &buffer_len)) {
1070
smb_panic("failed to create UCS2 buffer");
1073
/* We're using buffer_len below to count ucs2 characters, not bytes. */
1074
buffer_len /= sizeof(smb_ucs2_t);
1077
for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1078
unsigned char mb[10];
1079
/* Convert one smb_ucs2_t character at a time. */
1080
size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1081
if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1082
memcpy((char *)dest + dest_len, mb, mb_len);
1089
((char *)dest)[dest_len] = '\0';
1092
conv_silent = False;
1096
/********************************************************************
1097
Push and malloc an ascii string. src and dest null terminated.
1098
********************************************************************/
1100
bool push_ascii_allocate(char **dest, const char *src, size_t *converted_size)
1102
size_t src_len = strlen(src)+1;
1105
return convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1106
(void **)dest, converted_size, True);
1110
* Copy a string from a dos codepage source to a unix char* destination.
1112
* The resulting string in "dest" is always null terminated.
1114
* @param flags can have:
1116
* <dt>STR_TERMINATE</dt>
1117
* <dd>STR_TERMINATE means the string in @p src
1118
* is null terminated, and src_len is ignored.</dd>
1121
* @param src_len is the length of the source area in bytes.
1122
* @returns the number of bytes occupied by the string in @p src.
1124
size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1128
if (dest_len == (size_t)-1) {
1129
/* No longer allow dest_len of -1. */
1130
smb_panic("pull_ascii - invalid dest_len of -1");
1133
if (flags & STR_TERMINATE) {
1134
if (src_len == (size_t)-1) {
1135
src_len = strlen((const char *)src) + 1;
1137
size_t len = strnlen((const char *)src, src_len);
1144
ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1145
if (ret == (size_t)-1) {
1150
if (dest_len && ret) {
1151
/* Did we already process the terminating zero ? */
1152
if (dest[MIN(ret-1, dest_len-1)] != 0) {
1153
dest[MIN(ret, dest_len-1)] = 0;
1163
* Copy a string from a dos codepage source to a unix char* destination.
1165
Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1168
* The resulting string in "dest" is always null terminated.
1170
* @param flags can have:
1172
* <dt>STR_TERMINATE</dt>
1173
* <dd>STR_TERMINATE means the string in @p src
1174
* is null terminated, and src_len is ignored.</dd>
1177
* @param src_len is the length of the source area in bytes.
1178
* @returns the number of bytes occupied by the string in @p src.
1181
static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1191
/* Ensure we never use the braindead "malloc" varient. */
1193
smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1203
if (flags & STR_TERMINATE) {
1204
if (src_len == (size_t)-1) {
1205
src_len = strlen((const char *)src) + 1;
1207
size_t len = strnlen((const char *)src, src_len);
1212
/* Ensure we don't use an insane length from the client. */
1213
if (src_len >= 1024*1024) {
1214
char *msg = talloc_asprintf(ctx,
1215
"Bad src length (%u) in "
1216
"pull_ascii_base_talloc",
1217
(unsigned int)src_len);
1221
/* Can't have an unlimited length
1222
* non STR_TERMINATE'd.
1224
if (src_len == (size_t)-1) {
1230
/* src_len != -1 here. */
1232
if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1237
if (dest_len && dest) {
1238
/* Did we already process the terminating zero ? */
1239
if (dest[dest_len-1] != 0) {
1240
size_t size = talloc_get_size(dest);
1241
/* Have we got space to append the '\0' ? */
1242
if (size <= dest_len) {
1244
dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1248
dest_len = (size_t)-1;
1253
dest[dest_len] = '\0';
1264
size_t pull_ascii_fstring(char *dest, const void *src)
1266
return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1269
/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1271
size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1273
return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1277
* Copy a string from a char* src to a unicode destination.
1279
* @returns the number of bytes occupied by the string in the destination.
1281
* @param flags can have:
1284
* <dt>STR_TERMINATE <dd>means include the null termination.
1285
* <dt>STR_UPPER <dd>means uppercase in the destination.
1286
* <dt>STR_NOALIGN <dd>means don't do alignment.
1289
* @param dest_len is the maximum length allowed in the
1293
size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1299
if (dest_len == (size_t)-1) {
1300
/* No longer allow dest_len of -1. */
1301
smb_panic("push_ucs2 - invalid dest_len of -1");
1304
if (flags & STR_TERMINATE)
1305
src_len = (size_t)-1;
1307
src_len = strlen(src);
1309
if (ucs2_align(base_ptr, dest, flags)) {
1311
dest = (void *)((char *)dest + 1);
1317
/* ucs2 is always a multiple of 2 bytes */
1320
ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1321
if (ret == (size_t)-1) {
1322
if ((flags & STR_TERMINATE) &&
1332
if (flags & STR_UPPER) {
1333
smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1336
/* We check for i < (ret / 2) below as the dest string isn't null
1337
terminated if STR_TERMINATE isn't set. */
1339
for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1340
smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1341
if (v != dest_ucs2[i]) {
1352
* Copy a string from a unix char* src to a UCS2 destination,
1353
* allocating a buffer using talloc().
1355
* @param dest always set at least to NULL
1356
* @parm converted_size set to the number of bytes occupied by the string in
1357
* the destination on success.
1359
* @return true if new buffer was correctly allocated, and string was
1362
bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1363
size_t *converted_size)
1365
size_t src_len = strlen(src)+1;
1368
return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1369
(void **)dest, converted_size, True);
1374
* Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1376
* @param dest always set at least to NULL
1377
* @parm converted_size set to the number of bytes occupied by the string in
1378
* the destination on success.
1380
* @return true if new buffer was correctly allocated, and string was
1384
bool push_ucs2_allocate(smb_ucs2_t **dest, const char *src,
1385
size_t *converted_size)
1387
size_t src_len = strlen(src)+1;
1390
return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1391
(void **)dest, converted_size, True);
1395
Copy a string from a char* src to a UTF-8 destination.
1396
Return the number of bytes occupied by the string in the destination
1398
STR_TERMINATE means include the null termination
1399
STR_UPPER means uppercase in the destination
1400
dest_len is the maximum length allowed in the destination. If dest_len
1401
is -1 then no maxiumum is used.
1404
static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1408
char *tmpbuf = NULL;
1410
if (dest_len == (size_t)-1) {
1411
/* No longer allow dest_len of -1. */
1412
smb_panic("push_utf8 - invalid dest_len of -1");
1415
if (flags & STR_UPPER) {
1416
tmpbuf = strdup_upper(src);
1421
src_len = strlen(src);
1424
src_len = strlen(src);
1425
if (flags & STR_TERMINATE) {
1429
ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1434
size_t push_utf8_fstring(void *dest, const char *src)
1436
return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1440
* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1442
* @param dest always set at least to NULL
1443
* @parm converted_size set to the number of bytes occupied by the string in
1444
* the destination on success.
1446
* @return true if new buffer was correctly allocated, and string was
1450
bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1451
size_t *converted_size)
1453
size_t src_len = strlen(src)+1;
1456
return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1457
(void**)dest, converted_size, True);
1461
* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1463
* @param dest always set at least to NULL
1464
* @parm converted_size set to the number of bytes occupied by the string in
1465
* the destination on success.
1467
* @return true if new buffer was correctly allocated, and string was
1471
bool push_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1473
size_t src_len = strlen(src)+1;
1476
return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1477
(void **)dest, converted_size, True);
1481
Copy a string from a ucs2 source to a unix char* destination.
1483
STR_TERMINATE means the string in src is null terminated.
1484
STR_NOALIGN means don't try to align.
1485
if STR_TERMINATE is set then src_len is ignored if it is -1.
1486
src_len is the length of the source area in bytes
1487
Return the number of bytes occupied by the string in src.
1488
The resulting string in "dest" is always null terminated.
1491
size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1495
if (dest_len == (size_t)-1) {
1496
/* No longer allow dest_len of -1. */
1497
smb_panic("pull_ucs2 - invalid dest_len of -1");
1501
if (dest && dest_len > 0) {
1507
if (ucs2_align(base_ptr, src, flags)) {
1508
src = (const void *)((const char *)src + 1);
1509
if (src_len != (size_t)-1)
1513
if (flags & STR_TERMINATE) {
1514
/* src_len -1 is the default for null terminated strings. */
1515
if (src_len != (size_t)-1) {
1516
size_t len = strnlen_w((const smb_ucs2_t *)src,
1518
if (len < src_len/2)
1524
/* ucs2 is always a multiple of 2 bytes */
1525
if (src_len != (size_t)-1)
1528
ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1529
if (ret == (size_t)-1) {
1534
if (src_len == (size_t)-1)
1537
if (dest_len && ret) {
1538
/* Did we already process the terminating zero ? */
1539
if (dest[MIN(ret-1, dest_len-1)] != 0) {
1540
dest[MIN(ret, dest_len-1)] = 0;
1550
Copy a string from a ucs2 source to a unix char* destination.
1551
Talloc version with a base pointer.
1552
Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1555
STR_TERMINATE means the string in src is null terminated.
1556
STR_NOALIGN means don't try to align.
1557
if STR_TERMINATE is set then src_len is ignored if it is -1.
1558
src_len is the length of the source area in bytes
1559
Return the number of bytes occupied by the string in src.
1560
The resulting string in "dest" is always null terminated.
1563
size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1564
const void *base_ptr,
1576
/* Ensure we never use the braindead "malloc" varient. */
1578
smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1586
if (ucs2_align(base_ptr, src, flags)) {
1587
src = (const void *)((const char *)src + 1);
1588
if (src_len != (size_t)-1)
1592
if (flags & STR_TERMINATE) {
1593
/* src_len -1 is the default for null terminated strings. */
1594
if (src_len != (size_t)-1) {
1595
size_t len = strnlen_w((const smb_ucs2_t *)src,
1597
if (len < src_len/2)
1602
* src_len == -1 - alloc interface won't take this
1603
* so we must calculate.
1605
src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1607
/* Ensure we don't use an insane length from the client. */
1608
if (src_len >= 1024*1024) {
1609
smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1612
/* Can't have an unlimited length
1613
* non STR_TERMINATE'd.
1615
if (src_len == (size_t)-1) {
1621
/* src_len != -1 here. */
1623
/* ucs2 is always a multiple of 2 bytes */
1626
if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1627
(void *)&dest, &dest_len, True)) {
1632
/* Did we already process the terminating zero ? */
1633
if (dest[dest_len-1] != 0) {
1634
size_t size = talloc_get_size(dest);
1635
/* Have we got space to append the '\0' ? */
1636
if (size <= dest_len) {
1638
dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1642
dest_len = (size_t)-1;
1647
dest[dest_len] = '\0';
1658
size_t pull_ucs2_fstring(char *dest, const void *src)
1660
return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1664
* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1666
* @param dest always set at least to NULL
1667
* @parm converted_size set to the number of bytes occupied by the string in
1668
* the destination on success.
1670
* @return true if new buffer was correctly allocated, and string was
1674
bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1675
size_t *converted_size)
1677
size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1680
return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1681
(void **)dest, converted_size, True);
1685
* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1687
* @param dest always set at least to NULL
1688
* @parm converted_size set to the number of bytes occupied by the string in
1689
* the destination on success.
1690
* @return true if new buffer was correctly allocated, and string was
1694
bool pull_ucs2_allocate(char **dest, const smb_ucs2_t *src,
1695
size_t *converted_size)
1697
size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1700
return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1701
(void **)dest, converted_size, True);
1705
* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1707
* @param dest always set at least to NULL
1708
* @parm converted_size set to the number of bytes occupied by the string in
1709
* the destination on success.
1711
* @return true if new buffer was correctly allocated, and string was
1715
bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1716
size_t *converted_size)
1718
size_t src_len = strlen(src)+1;
1721
return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1722
(void **)dest, converted_size, True);
1726
* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1728
* @param dest always set at least to NULL
1729
* @parm converted_size set to the number of bytes occupied by the string in
1730
* the destination on success.
1732
* @return true if new buffer was correctly allocated, and string was
1736
bool pull_utf8_allocate(char **dest, const char *src, size_t *converted_size)
1738
size_t src_len = strlen(src)+1;
1741
return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1742
(void **)dest, converted_size, True);
1746
* Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1748
* @param dest always set at least to NULL
1749
* @parm converted_size set to the number of bytes occupied by the string in
1750
* the destination on success.
1752
* @return true if new buffer was correctly allocated, and string was
1756
bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1757
size_t *converted_size)
1759
size_t src_len = strlen(src)+1;
1762
return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1763
(void **)dest, converted_size, True);
1767
Copy a string from a char* src to a unicode or ascii
1768
dos codepage destination choosing unicode or ascii based on the
1769
flags in the SMB buffer starting at base_ptr.
1770
Return the number of bytes occupied by the string in the destination.
1772
STR_TERMINATE means include the null termination.
1773
STR_UPPER means uppercase in the destination.
1774
STR_ASCII use ascii even with unicode packet.
1775
STR_NOALIGN means don't do alignment.
1776
dest_len is the maximum length allowed in the destination. If dest_len
1777
is -1 then no maxiumum is used.
1780
size_t push_string_fn(const char *function, unsigned int line,
1781
const void *base_ptr, uint16 flags2,
1782
void *dest, const char *src,
1783
size_t dest_len, int flags)
1786
/* We really need to zero fill here, not clobber
1787
* region, as we want to ensure that valgrind thinks
1788
* all of the outgoing buffer has been written to
1789
* so a send() or write() won't trap an error.
1793
clobber_region(function, line, dest, dest_len);
1795
memset(dest, '\0', dest_len);
1799
if (!(flags & STR_ASCII) && \
1800
((flags & STR_UNICODE || \
1801
(flags2 & FLAGS2_UNICODE_STRINGS)))) {
1802
return push_ucs2(base_ptr, dest, src, dest_len, flags);
1804
return push_ascii(dest, src, dest_len, flags);
1809
Copy a string from a unicode or ascii source (depending on
1810
the packet flags) to a char* destination.
1812
STR_TERMINATE means the string in src is null terminated.
1813
STR_UNICODE means to force as unicode.
1814
STR_ASCII use ascii even with unicode packet.
1815
STR_NOALIGN means don't do alignment.
1816
if STR_TERMINATE is set then src_len is ignored is it is -1
1817
src_len is the length of the source area in bytes.
1818
Return the number of bytes occupied by the string in src.
1819
The resulting string in "dest" is always null terminated.
1822
size_t pull_string_fn(const char *function,
1824
const void *base_ptr,
1833
clobber_region(function, line, dest, dest_len);
1836
if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1837
smb_panic("No base ptr to get flg2 and neither ASCII nor "
1841
if (!(flags & STR_ASCII) && \
1842
((flags & STR_UNICODE || \
1843
(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1844
return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1846
return pull_ascii(dest, src, dest_len, src_len, flags);
1850
Copy a string from a unicode or ascii source (depending on
1851
the packet flags) to a char* destination.
1852
Variant that uses talloc.
1854
STR_TERMINATE means the string in src is null terminated.
1855
STR_UNICODE means to force as unicode.
1856
STR_ASCII use ascii even with unicode packet.
1857
STR_NOALIGN means don't do alignment.
1858
if STR_TERMINATE is set then src_len is ignored is it is -1
1859
src_len is the length of the source area in bytes.
1860
Return the number of bytes occupied by the string in src.
1861
The resulting string in "dest" is always null terminated.
1864
size_t pull_string_talloc_fn(const char *function,
1867
const void *base_ptr,
1874
if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1875
smb_panic("No base ptr to get flg2 and neither ASCII nor "
1879
if (!(flags & STR_ASCII) && \
1880
((flags & STR_UNICODE || \
1881
(smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1882
return pull_ucs2_base_talloc(ctx,
1889
return pull_ascii_base_talloc(ctx,
1897
size_t align_string(const void *base_ptr, const char *p, int flags)
1899
if (!(flags & STR_ASCII) && \
1900
((flags & STR_UNICODE || \
1901
(SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1902
return ucs2_align(base_ptr, p, flags);
1908
Return the unicode codepoint for the next multi-byte CH_UNIX character
1909
in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1911
Also return the number of bytes consumed (which tells the caller
1912
how many bytes to skip to get to the next CH_UNIX character).
1914
Return INVALID_CODEPOINT if the next character cannot be converted.
1917
codepoint_t next_codepoint(const char *str, size_t *size)
1919
/* It cannot occupy more than 4 bytes in UTF16 format */
1921
smb_iconv_t descriptor;
1927
if ((str[0] & 0x80) == 0) {
1929
return (codepoint_t)str[0];
1932
/* We assume that no multi-byte character can take
1933
more than 5 bytes. This is OK as we only
1934
support codepoints up to 1M */
1936
ilen_orig = strnlen(str, 5);
1939
lazy_initialize_conv();
1941
descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1942
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1944
return INVALID_CODEPOINT;
1947
/* This looks a little strange, but it is needed to cope
1948
with codepoints above 64k which are encoded as per RFC2781. */
1950
outbuf = (char *)buf;
1951
smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1953
/* We failed to convert to a 2 byte character.
1954
See if we can convert to a 4 UTF16-LE byte char encoding.
1957
outbuf = (char *)buf;
1958
smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1960
/* We didn't convert any bytes */
1962
return INVALID_CODEPOINT;
1969
*size = ilen_orig - ilen;
1972
/* 2 byte, UTF16-LE encoded value. */
1973
return (codepoint_t)SVAL(buf, 0);
1976
/* Decode a 4 byte UTF16-LE character manually.
1977
See RFC2871 for the encoding machanism.
1979
codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1980
codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1982
return (codepoint_t)0x10000 +
1986
/* no other length is valid */
1987
return INVALID_CODEPOINT;