2
Unix SMB/CIFS implementation.
3
Samba utility functions
4
Copyright (C) Andrew Tridgell 1992-2001
5
Copyright (C) Simo Sorce 2001
7
This program is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3 of the License, or
10
(at your option) any later version.
12
This program is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
17
You should have received a copy of the GNU General Public License
18
along with this program. If not, see <http://www.gnu.org/licenses/>.
22
#include "system/locale.h"
24
struct smb_iconv_convenience *global_iconv_convenience = NULL;
26
static inline struct smb_iconv_convenience *get_iconv_convenience(void)
28
if (global_iconv_convenience == NULL)
29
global_iconv_convenience = smb_iconv_convenience_init(talloc_autofree_context(), "ASCII", "UTF-8", true);
30
return global_iconv_convenience;
34
Case insensitive string compararison
36
_PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
38
codepoint_t c1=0, c2=0;
40
struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
42
/* handle null ptr comparisons to simplify the use in qsort */
43
if (s1 == s2) return 0;
44
if (s1 == NULL) return -1;
45
if (s2 == NULL) return 1;
48
c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
49
c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
58
if (c1 == INVALID_CODEPOINT ||
59
c2 == INVALID_CODEPOINT) {
60
/* what else can we do?? */
61
return strcasecmp(s1, s2);
64
if (toupper_m(c1) != toupper_m(c2)) {
73
* Get the next token from a string, return False if none found.
74
* Handles double-quotes.
76
* Based on a routine by GJC@VILLAGE.COM.
77
* Extensively modified by Andrew.Tridgell@anu.edu.au
79
_PUBLIC_ bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
90
/* default to simple separators */
94
/* find the first non sep char */
95
while (*s && strchr_m(sep,*s))
102
/* copy over the token */
103
for (quoted = false; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
112
*ptr = (*s) ? s+1 : s;
119
Case insensitive string compararison, length limited
121
_PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
123
codepoint_t c1=0, c2=0;
125
struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
127
/* handle null ptr comparisons to simplify the use in qsort */
128
if (s1 == s2) return 0;
129
if (s1 == NULL) return -1;
130
if (s2 == NULL) return 1;
132
while (*s1 && *s2 && n) {
135
c1 = next_codepoint_convenience(iconv_convenience, s1, &size1);
136
c2 = next_codepoint_convenience(iconv_convenience, s2, &size2);
145
if (c1 == INVALID_CODEPOINT ||
146
c2 == INVALID_CODEPOINT) {
147
/* what else can we do?? */
148
return strcasecmp(s1, s2);
151
if (toupper_m(c1) != toupper_m(c2)) {
166
* @note The comparison is case-insensitive.
168
_PUBLIC_ bool strequal_m(const char *s1, const char *s2)
170
return strcasecmp_m(s1,s2) == 0;
174
Compare 2 strings (case sensitive).
176
_PUBLIC_ bool strcsequal_m(const char *s1,const char *s2)
183
return strcmp(s1,s2) == 0;
189
NOTE: oldc and newc must be 7 bit characters
191
_PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
193
struct smb_iconv_convenience *ic = get_iconv_convenience();
196
codepoint_t c = next_codepoint_convenience(ic, s, &size);
205
Paranoid strcpy into a buffer of given length (includes terminating
206
zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
207
and replaces with '_'. Deliberately does *NOT* check for multibyte
208
characters. Don't change it !
211
_PUBLIC_ char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
215
if (maxlength == 0) {
216
/* can't fit any bytes at all! */
221
DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
231
if (len >= maxlength)
234
if (!other_safe_chars)
235
other_safe_chars = "";
237
for(i = 0; i < len; i++) {
238
int val = (src[i] & 0xff);
239
if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
251
Count the number of UCS2 characters in a string. Normally this will
252
be the same as the number of bytes in a string for single byte strings,
253
but will be different for multibyte.
255
_PUBLIC_ size_t strlen_m(const char *s)
258
struct smb_iconv_convenience *ic = get_iconv_convenience();
264
while (*s && !(((uint8_t)*s) & 0x80)) {
275
codepoint_t c = next_codepoint_convenience(ic, s, &c_size);
288
Work out the number of multibyte chars in a string, including the NULL
291
_PUBLIC_ size_t strlen_m_term(const char *s)
297
return strlen_m(s) + 1;
301
* Weird helper routine for the winreg pipe: If nothing is around, return 0,
302
* if a string is there, include the terminator.
305
_PUBLIC_ size_t strlen_m_term_null(const char *s)
320
Strchr and strrchr_m are a bit complex on general multi-byte strings.
322
_PUBLIC_ char *strchr_m(const char *s, char c)
324
struct smb_iconv_convenience *ic = get_iconv_convenience();
328
/* characters below 0x3F are guaranteed to not appear in
329
non-initial position in multi-byte charsets */
330
if ((c & 0xC0) == 0) {
336
codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
338
return discard_const_p(char, s);
347
* Multibyte-character version of strrchr
349
_PUBLIC_ char *strrchr_m(const char *s, char c)
351
struct smb_iconv_convenience *ic = get_iconv_convenience();
358
/* characters below 0x3F are guaranteed to not appear in
359
non-initial position in multi-byte charsets */
360
if ((c & 0xC0) == 0) {
361
return strrchr(s, c);
366
codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
368
ret = discard_const_p(char, s);
377
return True if any (multi-byte) character is lower case
379
_PUBLIC_ bool strhaslower(const char *string)
381
struct smb_iconv_convenience *ic = get_iconv_convenience();
387
s = next_codepoint_convenience(ic, string, &c_size);
393
return true; /* that means it has lower case chars */
401
return True if any (multi-byte) character is upper case
403
_PUBLIC_ bool strhasupper(const char *string)
405
struct smb_iconv_convenience *ic = get_iconv_convenience();
411
s = next_codepoint_convenience(ic, string, &c_size);
417
return true; /* that means it has upper case chars */
425
Convert a string to lower case, allocated with talloc
427
_PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
431
struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
433
/* this takes advantage of the fact that upper/lower can't
434
change the length of a character by more than 1 byte */
435
dest = talloc_array(ctx, char, 2*(strlen(src))+1);
442
codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
447
c_size = push_codepoint(iconv_convenience, dest+size, c);
457
/* trim it so talloc_append_string() works */
458
dest = talloc_realloc(ctx, dest, char, size+1);
460
talloc_set_name_const(dest, dest);
466
Convert a string to UPPER case, allocated with talloc
467
source length limited to n bytes
469
_PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
473
struct smb_iconv_convenience *iconv_convenience = get_iconv_convenience();
479
/* this takes advantage of the fact that upper/lower can't
480
change the length of a character by more than 1 byte */
481
dest = talloc_array(ctx, char, 2*(n+1));
486
while (*src && n--) {
488
codepoint_t c = next_codepoint_convenience(iconv_convenience, src, &c_size);
493
c_size = push_codepoint(iconv_convenience, dest+size, c);
503
/* trim it so talloc_append_string() works */
504
dest = talloc_realloc(ctx, dest, char, size+1);
506
talloc_set_name_const(dest, dest);
512
Convert a string to UPPER case, allocated with talloc
514
_PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
516
return strupper_talloc_n(ctx, src, src?strlen(src):0);
520
talloc_strdup() a unix string to upper case.
522
_PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
524
return strupper_talloc(ctx, src);
528
Convert a string to lower case.
530
_PUBLIC_ void strlower_m(char *s)
533
struct smb_iconv_convenience *iconv_convenience;
535
/* this is quite a common operation, so we want it to be
536
fast. We optimise for the ascii case, knowing that all our
537
supported multi-byte character sets are ascii-compatible
538
(ie. they match for the first 128 chars) */
539
while (*s && !(((uint8_t)*s) & 0x80)) {
540
*s = tolower((uint8_t)*s);
547
iconv_convenience = get_iconv_convenience();
552
size_t c_size, c_size2;
553
codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
554
c_size2 = push_codepoint(iconv_convenience, d, tolower_m(c));
555
if (c_size2 > c_size) {
556
DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
557
c, tolower_m(c), (int)c_size, (int)c_size2));
558
smb_panic("codepoint expansion in strlower_m\n");
567
Convert a string to UPPER case.
569
_PUBLIC_ void strupper_m(char *s)
572
struct smb_iconv_convenience *iconv_convenience;
574
/* this is quite a common operation, so we want it to be
575
fast. We optimise for the ascii case, knowing that all our
576
supported multi-byte character sets are ascii-compatible
577
(ie. they match for the first 128 chars) */
578
while (*s && !(((uint8_t)*s) & 0x80)) {
579
*s = toupper((uint8_t)*s);
586
iconv_convenience = get_iconv_convenience();
591
size_t c_size, c_size2;
592
codepoint_t c = next_codepoint_convenience(iconv_convenience, s, &c_size);
593
c_size2 = push_codepoint(iconv_convenience, d, toupper_m(c));
594
if (c_size2 > c_size) {
595
DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
596
c, toupper_m(c), (int)c_size, (int)c_size2));
597
smb_panic("codepoint expansion in strupper_m\n");
607
Find the number of 'c' chars in a string
609
_PUBLIC_ size_t count_chars_m(const char *s, char c)
611
struct smb_iconv_convenience *ic = get_iconv_convenience();
616
codepoint_t c2 = next_codepoint_convenience(ic, s, &size);
617
if (c2 == c) count++;
626
* Copy a string from a char* unix src to a dos codepage string destination.
628
* @return the number of bytes occupied by the string in the destination.
630
* @param flags can include
632
* <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
633
* <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
636
* @param dest_len the maximum length in bytes allowed in the
637
* destination. If @p dest_len is -1 then no maximum is used.
639
static ssize_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
644
if (flags & STR_UPPER) {
645
char *tmpbuf = strupper_talloc(NULL, src);
646
if (tmpbuf == NULL) {
649
ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
654
src_len = strlen(src);
656
if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
659
return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, false);
663
* Copy a string from a unix char* src to an ASCII destination,
664
* allocating a buffer using talloc().
666
* @param dest always set at least to NULL
668
* @returns The number of bytes occupied by the string in the destination
669
* or -1 in case of error.
671
_PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
673
size_t src_len = strlen(src)+1;
675
return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size, false);
680
* Copy a string from a dos codepage source to a unix char* destination.
682
* The resulting string in "dest" is always null terminated.
684
* @param flags can have:
686
* <dt>STR_TERMINATE</dt>
687
* <dd>STR_TERMINATE means the string in @p src
688
* is null terminated, and src_len is ignored.</dd>
691
* @param src_len is the length of the source area in bytes.
692
* @returns the number of bytes occupied by the string in @p src.
694
static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
698
if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
699
if (src_len == (size_t)-1) {
700
src_len = strlen((const char *)src) + 1;
702
size_t len = strnlen((const char *)src, src_len);
709
ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, false);
712
dest[MIN(ret, dest_len-1)] = 0;
718
* Copy a string from a char* src to a unicode destination.
720
* @returns the number of bytes occupied by the string in the destination.
722
* @param flags can have:
725
* <dt>STR_TERMINATE <dd>means include the null termination.
726
* <dt>STR_UPPER <dd>means uppercase in the destination.
727
* <dt>STR_NOALIGN <dd>means don't do alignment.
730
* @param dest_len is the maximum length allowed in the
731
* destination. If dest_len is -1 then no maxiumum is used.
733
static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
736
size_t src_len = strlen(src);
739
if (flags & STR_UPPER) {
740
char *tmpbuf = strupper_talloc(NULL, src);
741
if (tmpbuf == NULL) {
744
ret = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
749
if (flags & STR_TERMINATE)
752
if (ucs2_align(NULL, dest, flags)) {
754
dest = (void *)((char *)dest + 1);
755
if (dest_len) dest_len--;
759
/* ucs2 is always a multiple of 2 bytes */
762
ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, false);
763
if (ret == (size_t)-1) {
774
* Copy a string from a unix char* src to a UCS2 destination,
775
* allocating a buffer using talloc().
777
* @param dest always set at least to NULL
779
* @returns The number of bytes occupied by the string in the destination
780
* or -1 in case of error.
782
_PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size)
784
size_t src_len = strlen(src)+1;
786
return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size, false);
791
* Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
793
* @param dest always set at least to NULL
795
* @returns The number of bytes occupied by the string in the destination
798
_PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
800
size_t src_len = strlen(src)+1;
802
return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size, false);
806
Copy a string from a ucs2 source to a unix char* destination.
808
STR_TERMINATE means the string in src is null terminated.
809
STR_NOALIGN means don't try to align.
810
if STR_TERMINATE is set then src_len is ignored if it is -1.
811
src_len is the length of the source area in bytes
812
Return the number of bytes occupied by the string in src.
813
The resulting string in "dest" is always null terminated.
816
static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
820
if (ucs2_align(NULL, src, flags)) {
821
src = (const void *)((const char *)src + 1);
826
if (flags & STR_TERMINATE) {
827
if (src_len == (size_t)-1) {
828
src_len = utf16_len(src);
830
src_len = utf16_len_n(src, src_len);
834
/* ucs2 is always a multiple of 2 bytes */
835
if (src_len != (size_t)-1)
838
ret = convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, false);
840
dest[MIN(ret, dest_len-1)] = 0;
846
* Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
848
* @param dest always set at least to NULL
850
* @returns The number of bytes occupied by the string in the destination
853
_PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
855
size_t src_len = strlen(src)+1;
857
return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
861
* Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
863
* @param dest always set at least to NULL
865
* @returns The number of bytes occupied by the string in the destination
868
_PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size)
870
size_t src_len = utf16_len(src);
872
return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
876
* Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
878
* @param dest always set at least to NULL
880
* @returns The number of bytes occupied by the string in the destination
883
_PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size)
885
size_t src_len = strlen(src)+1;
887
return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size, false);
891
Copy a string from a char* src to a unicode or ascii
892
dos codepage destination choosing unicode or ascii based on the
893
flags in the SMB buffer starting at base_ptr.
894
Return the number of bytes occupied by the string in the destination.
896
STR_TERMINATE means include the null termination.
897
STR_UPPER means uppercase in the destination.
898
STR_ASCII use ascii even with unicode packet.
899
STR_NOALIGN means don't do alignment.
900
dest_len is the maximum length allowed in the destination. If dest_len
901
is -1 then no maxiumum is used.
904
_PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
906
if (flags & STR_ASCII) {
907
return push_ascii(dest, src, dest_len, flags);
908
} else if (flags & STR_UNICODE) {
909
return push_ucs2(dest, src, dest_len, flags);
911
smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
918
Copy a string from a unicode or ascii source (depending on
919
the packet flags) to a char* destination.
921
STR_TERMINATE means the string in src is null terminated.
922
STR_UNICODE means to force as unicode.
923
STR_ASCII use ascii even with unicode packet.
924
STR_NOALIGN means don't do alignment.
925
if STR_TERMINATE is set then src_len is ignored is it is -1
926
src_len is the length of the source area in bytes.
927
Return the number of bytes occupied by the string in src.
928
The resulting string in "dest" is always null terminated.
931
_PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
933
if (flags & STR_ASCII) {
934
return pull_ascii(dest, src, dest_len, src_len, flags);
935
} else if (flags & STR_UNICODE) {
936
return pull_ucs2(dest, src, dest_len, src_len, flags);
938
smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
945
* Convert string from one encoding to another, making error checking etc
947
* @param src pointer to source string (multibyte or singlebyte)
948
* @param srclen length of the source string in bytes
949
* @param dest pointer to destination string (multibyte or singlebyte)
950
* @param destlen maximal length allowed for string
951
* @returns the number of bytes occupied in the destination
953
_PUBLIC_ size_t convert_string(charset_t from, charset_t to,
954
void const *src, size_t srclen,
955
void *dest, size_t destlen,
956
bool allow_badcharcnv)
959
if (!convert_string_convenience(get_iconv_convenience(), from, to,
968
* Convert between character sets, allocating a new buffer using talloc for the result.
970
* @param srclen length of source buffer.
971
* @param dest always set at least to NULL
972
* @param converted_size Size in bytes of the converted string
973
* @note -1 is not accepted for srclen.
975
* @returns boolean indication whether the conversion succeeded
978
_PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
979
charset_t from, charset_t to,
980
void const *src, size_t srclen,
981
void *dest, size_t *converted_size,
982
bool allow_badcharcnv)
984
return convert_string_talloc_convenience(ctx, get_iconv_convenience(),
985
from, to, src, srclen, dest,
991
_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
993
return next_codepoint_convenience(get_iconv_convenience(), str, size);