2
* Copyright (c) 2005 Martin Decky
3
* Copyright (c) 2008 Jiri Svoboda
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
10
* - Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* - Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* - The name of the author may not be used to endorse or promote products
16
* derived from this software without specific prior written permission.
18
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47
/** Byte mask consisting of lowest @n bits (out of 8) */
48
#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
/** Byte mask consisting of lowest @n bits (out of 32) */
51
#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
/** Byte mask consisting of highest @n bits (out of 8) */
54
#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
/** Number of data bits in a UTF-8 continuation byte */
59
/** Decode a single character from a string.
61
* Decode a single character from a string of size @a size. Decoding starts
62
* at @a offset and this offset is moved to the beginning of the next
63
* character. In case of decoding error, offset generally advances at least
64
* by one. However, offset is never moved beyond size.
66
* @param str String (not necessarily NULL-terminated).
67
* @param offset Byte offset in string where to start decoding.
68
* @param size Size of the string (in bytes).
70
* @return Value of decoded character, U_SPECIAL on decoding error or
71
* NULL if attempt to decode beyond @a size.
74
wchar_t str_decode(const char *str, size_t *offset, size_t size)
76
if (*offset + 1 > size)
79
/* First byte read from string */
80
uint8_t b0 = (uint8_t) str[(*offset)++];
82
/* Determine code length */
84
unsigned int b0_bits; /* Data bits in first byte */
85
unsigned int cbytes; /* Number of continuation bytes */
87
if ((b0 & 0x80) == 0) {
88
/* 0xxxxxxx (Plain ASCII) */
91
} else if ((b0 & 0xe0) == 0xc0) {
92
/* 110xxxxx 10xxxxxx */
95
} else if ((b0 & 0xf0) == 0xe0) {
96
/* 1110xxxx 10xxxxxx 10xxxxxx */
99
} else if ((b0 & 0xf8) == 0xf0) {
100
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
104
/* 10xxxxxx -- unexpected continuation byte */
108
if (*offset + cbytes > size)
111
wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
/* Decode continuation bytes */
115
uint8_t b = (uint8_t) str[(*offset)++];
117
/* Must be 10xxxxxx */
118
if ((b & 0xc0) != 0x80)
121
/* Shift data bits to ch */
122
ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
129
/** Encode a single character to string representation.
131
* Encode a single character to string representation (i.e. UTF-8) and store
132
* it into a buffer at @a offset. Encoding starts at @a offset and this offset
133
* is moved to the position where the next character can be written to.
135
* @param ch Input character.
136
* @param str Output buffer.
137
* @param offset Byte offset where to start writing.
138
* @param size Size of the output buffer (in bytes).
140
* @return EOK if the character was encoded successfully, EOVERFLOW if there
141
* was not enough space in the output buffer or EINVAL if the character
144
int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
152
/* Unsigned version of ch (bit operations should only be done
153
on unsigned types). */
154
uint32_t cc = (uint32_t) ch;
156
/* Determine how many continuation bytes are needed */
158
unsigned int b0_bits; /* Data bits in first byte */
159
unsigned int cbytes; /* Number of continuation bytes */
161
if ((cc & ~LO_MASK_32(7)) == 0) {
164
} else if ((cc & ~LO_MASK_32(11)) == 0) {
167
} else if ((cc & ~LO_MASK_32(16)) == 0) {
170
} else if ((cc & ~LO_MASK_32(21)) == 0) {
174
/* Codes longer than 21 bits are not supported */
178
/* Check for available space in buffer */
179
if (*offset + cbytes >= size)
182
/* Encode continuation bytes */
184
for (i = cbytes; i > 0; i--) {
185
str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
186
cc = cc >> CONT_BITS;
189
/* Encode first byte */
190
str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
193
*offset += cbytes + 1;
198
/** Get size of string.
200
* Get the number of bytes which are used by the string @a str (excluding the
203
* @param str String to consider.
205
* @return Number of bytes used by the string
208
size_t str_size(const char *str)
218
/** Get size of wide string.
220
* Get the number of bytes which are used by the wide string @a str (excluding the
223
* @param str Wide string to consider.
225
* @return Number of bytes used by the wide string
228
size_t wstr_size(const wchar_t *str)
230
return (wstr_length(str) * sizeof(wchar_t));
233
/** Get size of string with length limit.
235
* Get the number of bytes which are used by up to @a max_len first
236
* characters in the string @a str. If @a max_len is greater than
237
* the length of @a str, the entire string is measured (excluding the
240
* @param str String to consider.
241
* @param max_len Maximum number of characters to measure.
243
* @return Number of bytes used by the characters.
246
size_t str_lsize(const char *str, size_t max_len)
251
while (len < max_len) {
252
if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
261
/** Get size of wide string with length limit.
263
* Get the number of bytes which are used by up to @a max_len first
264
* wide characters in the wide string @a str. If @a max_len is greater than
265
* the length of @a str, the entire wide string is measured (excluding the
268
* @param str Wide string to consider.
269
* @param max_len Maximum number of wide characters to measure.
271
* @return Number of bytes used by the wide characters.
274
size_t wstr_lsize(const wchar_t *str, size_t max_len)
276
return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
279
/** Get number of characters in a string.
281
* @param str NULL-terminated string.
283
* @return Number of characters in string.
286
size_t str_length(const char *str)
291
while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
297
/** Get number of characters in a wide string.
299
* @param str NULL-terminated wide string.
301
* @return Number of characters in @a str.
304
size_t wstr_length(const wchar_t *wstr)
314
/** Get number of characters in a string with size limit.
316
* @param str NULL-terminated string.
317
* @param size Maximum number of bytes to consider.
319
* @return Number of characters in string.
322
size_t str_nlength(const char *str, size_t size)
327
while (str_decode(str, &offset, size) != 0)
333
/** Get number of characters in a string with size limit.
335
* @param str NULL-terminated string.
336
* @param size Maximum number of bytes to consider.
338
* @return Number of characters in string.
341
size_t wstr_nlength(const wchar_t *str, size_t size)
344
size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
347
while ((offset < limit) && (*str++ != 0)) {
349
offset += sizeof(wchar_t);
355
/** Check whether character is plain ASCII.
357
* @return True if character is plain ASCII.
360
bool ascii_check(wchar_t ch)
362
if ((ch >= 0) && (ch <= 127))
368
/** Check whether character is valid
370
* @return True if character is a valid Unicode code point.
373
bool chr_check(wchar_t ch)
375
if ((ch >= 0) && (ch <= 1114111))
381
/** Compare two NULL terminated strings.
383
* Do a char-by-char comparison of two NULL-terminated strings.
384
* The strings are considered equal iff they consist of the same
385
* characters on the minimum of their lengths.
387
* @param s1 First string to compare.
388
* @param s2 Second string to compare.
390
* @return 0 if the strings are equal, -1 if first is smaller,
391
* 1 if second smaller.
394
int str_cmp(const char *s1, const char *s2)
403
c1 = str_decode(s1, &off1, STR_NO_LIMIT);
404
c2 = str_decode(s2, &off2, STR_NO_LIMIT);
412
if (c1 == 0 || c2 == 0)
419
/** Compare two NULL terminated strings with length limit.
421
* Do a char-by-char comparison of two NULL-terminated strings.
422
* The strings are considered equal iff they consist of the same
423
* characters on the minimum of their lengths and the length limit.
425
* @param s1 First string to compare.
426
* @param s2 Second string to compare.
427
* @param max_len Maximum number of characters to consider.
429
* @return 0 if the strings are equal, -1 if first is smaller,
430
* 1 if second smaller.
433
int str_lcmp(const char *s1, const char *s2, size_t max_len)
447
c1 = str_decode(s1, &off1, STR_NO_LIMIT);
448
c2 = str_decode(s2, &off2, STR_NO_LIMIT);
456
if (c1 == 0 || c2 == 0)
468
* Copy source string @a src to destination buffer @a dest.
469
* No more than @a size bytes are written. If the size of the output buffer
470
* is at least one byte, the output string will always be well-formed, i.e.
471
* null-terminated and containing only complete characters.
473
* @param dst Destination buffer.
474
* @param count Size of the destination buffer (must be > 0).
475
* @param src Source string.
477
void str_cpy(char *dest, size_t size, const char *src)
483
/* There must be space for a null terminator in the buffer. */
489
while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
490
if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
494
dest[dest_off] = '\0';
497
/** Copy size-limited substring.
499
* Copy prefix of string @a src of max. size @a size to destination buffer
500
* @a dest. No more than @a size bytes are written. The output string will
501
* always be well-formed, i.e. null-terminated and containing only complete
504
* No more than @a n bytes are read from the input string, so it does not
505
* have to be null-terminated.
507
* @param dst Destination buffer.
508
* @param count Size of the destination buffer (must be > 0).
509
* @param src Source string.
510
* @param n Maximum number of bytes to read from @a src.
512
void str_ncpy(char *dest, size_t size, const char *src, size_t n)
518
/* There must be space for a null terminator in the buffer. */
524
while ((ch = str_decode(src, &src_off, n)) != 0) {
525
if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
529
dest[dest_off] = '\0';
532
/** Append one string to another.
534
* Append source string @a src to string in destination buffer @a dest.
535
* Size of the destination buffer is @a dest. If the size of the output buffer
536
* is at least one byte, the output string will always be well-formed, i.e.
537
* null-terminated and containing only complete characters.
539
* @param dst Destination buffer.
540
* @param count Size of the destination buffer.
541
* @param src Source string.
543
void str_append(char *dest, size_t size, const char *src)
547
dstr_size = str_size(dest);
548
str_cpy(dest + dstr_size, size - dstr_size, src);
551
/** Copy NULL-terminated wide string to string
553
* Copy source wide string @a src to destination buffer @a dst.
554
* No more than @a size bytes are written. NULL-terminator is always
555
* written after the last succesfully copied character (i.e. if the
556
* destination buffer is has at least 1 byte, it will be always
559
* @param src Source wide string.
560
* @param dst Destination buffer.
561
* @param count Size of the destination buffer.
564
void wstr_nstr(char *dst, const wchar_t *src, size_t size)
566
/* No space for the NULL-terminator in the buffer */
574
while ((ch = src[src_idx++]) != 0) {
575
if (chr_encode(ch, dst, &dst_off, size) != EOK)
585
/** Find first occurence of character in string.
587
* @param str String to search.
588
* @param ch Character to look for.
590
* @return Pointer to character in @a str or NULL if not found.
592
char *str_chr(const char *str, wchar_t ch)
598
while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
600
return (char *) (str + last);
607
/** Find last occurence of character in string.
609
* @param str String to search.
610
* @param ch Character to look for.
612
* @return Pointer to character in @a str or NULL if not found.
614
char *str_rchr(const char *str, wchar_t ch)
619
const char *res = NULL;
621
while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
630
/** Insert a wide character into a wide string.
632
* Insert a wide character into a wide string at position
633
* @a pos. The characters after the position are shifted.
635
* @param str String to insert to.
636
* @param ch Character to insert to.
637
* @param pos Character index where to insert.
638
@ @param max_pos Characters in the buffer.
640
* @return True if the insertion was sucessful, false if the position
644
bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
646
size_t len = wstr_length(str);
648
if ((pos > len) || (pos + 1 > max_pos))
652
for (i = len; i + 1 > pos; i--)
660
/** Remove a wide character from a wide string.
662
* Remove a wide character from a wide string at position
663
* @a pos. The characters after the position are shifted.
665
* @param str String to remove from.
666
* @param pos Character index to remove.
668
* @return True if the removal was sucessful, false if the position
672
bool wstr_remove(wchar_t *str, size_t pos)
674
size_t len = wstr_length(str);
680
for (i = pos + 1; i <= len; i++)
686
int stricmp(const char *a, const char *b)
690
while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
693
return (tolower(a[c]) - tolower(b[c]));
696
/** Convert string to a number.
697
* Core of strtol and strtoul functions.
699
* @param nptr Pointer to string.
700
* @param endptr If not NULL, function stores here pointer to the first
702
* @param base Zero or number between 2 and 36 inclusive.
703
* @param sgn It's set to 1 if minus found.
704
* @return Result of conversion.
707
_strtoul(const char *nptr, char **endptr, int base, char *sgn)
710
unsigned long result = 0;
712
const char *str = nptr;
715
while (isspace(*str))
721
} else if (*str == '+')
725
if ((base == 1) || (base > 36)) {
726
/* FIXME: set errno to EINVAL */
729
if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
738
if ((str[1] == 'X') || (str[1] == 'x')) {
749
c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
750
(c <= '9' ? c - '0' : 0xff)));
755
a = (result & 0xff) * base + c;
756
b = (result >> 8) * base + (a >> 8);
758
if (b > (ULONG_MAX >> 8)) {
760
/* FIXME: errno = ERANGE*/
764
result = (b << 8) + (a & 0xff);
770
* No number was found => first invalid character is the first
771
* character of the string.
773
/* FIXME: set errno to EINVAL */
779
*endptr = (char *) str;
782
/*FIXME: errno = EINVAL*/
789
/** Convert initial part of string to long int according to given base.
790
* The number may begin with an arbitrary number of whitespaces followed by
791
* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
792
* inserted and the number will be taken as hexadecimal one. If the base is 0
793
* and the number begin with a zero, number will be taken as octal one (as with
794
* base 8). Otherwise the base 0 is taken as decimal.
796
* @param nptr Pointer to string.
797
* @param endptr If not NULL, function stores here pointer to the first
799
* @param base Zero or number between 2 and 36 inclusive.
800
* @return Result of conversion.
802
long int strtol(const char *nptr, char **endptr, int base)
805
unsigned long number = 0;
807
number = _strtoul(nptr, endptr, base, &sgn);
809
if (number > LONG_MAX) {
810
if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
811
/* FIXME: set 0 to errno */
814
/* FIXME: set ERANGE to errno */
815
return (sgn ? LONG_MIN : LONG_MAX);
818
return (sgn ? -number : number);
822
/** Convert initial part of string to unsigned long according to given base.
823
* The number may begin with an arbitrary number of whitespaces followed by
824
* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
825
* inserted and the number will be taken as hexadecimal one. If the base is 0
826
* and the number begin with a zero, number will be taken as octal one (as with
827
* base 8). Otherwise the base 0 is taken as decimal.
829
* @param nptr Pointer to string.
830
* @param endptr If not NULL, function stores here pointer to the first
832
* @param base Zero or number between 2 and 36 inclusive.
833
* @return Result of conversion.
835
unsigned long strtoul(const char *nptr, char **endptr, int base)
838
unsigned long number = 0;
840
number = _strtoul(nptr, endptr, base, &sgn);
842
return (sgn ? -number : number);
845
char *str_dup(const char *src)
847
size_t size = str_size(src);
848
void *dest = malloc(size + 1);
851
return (char *) NULL;
853
return (char *) memcpy(dest, src, size + 1);
856
char *strtok(char *s, const char *delim)
860
return strtok_r(s, delim, &next);
863
char *strtok_r(char *s, const char *delim, char **next)
870
/* Skip over leading delimiters. */
871
while (*s && (str_chr(delim, *s) != NULL)) ++s;
874
/* Skip over token characters. */
875
while (*s && (str_chr(delim, *s) == NULL)) ++s;
877
*next = (*s ? s + 1 : s);
880
return NULL; /* No more tokens. */
883
/* Overwrite delimiter with NULL terminator. */