2
* The contents of this file are subject to the Mozilla Public
3
* License Version 1.1 (the "MPL"); you may not use this file
4
* except in compliance with the MPL. You may obtain a copy of
5
* the MPL at http://www.mozilla.org/MPL/
7
* Software distributed under the MPL is distributed on an "AS
8
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
9
* implied. See the MPL for the specific language governing
10
* rights and limitations under the MPL.
12
* The Original Code is lineterm.
14
* The Initial Developer of the Original Code is Ramalingam Saravanan.
15
* Portions created by Ramalingam Saravanan <svn@xmlterm.org> are
16
* Copyright (C) 1999 Ramalingam Saravanan. All Rights Reserved.
20
* Alternatively, the contents of this file may be used under the
21
* terms of the GNU General Public License (the "GPL"), in which case
22
* the provisions of the GPL are applicable instead of
23
* those above. If you wish to allow use of your version of this
24
* file only under the terms of the GPL and not to allow
25
* others to use your version of this file under the MPL, indicate
26
* your decision by deleting the provisions above and replace them
27
* with the notice and other provisions required by the GPL.
28
* If you do not delete the provisions above, a recipient
29
* may use your version of this file under either the MPL or the
33
/* unistring.c: Unicode string operations implementation */
35
/* public declarations */
36
#include "unistring.h"
38
/* private declarations */
40
/** Encodes Unicode string US with NUS characters into UTF8 string S with
41
* upto NS characters, returning the number of REMAINING Unicode characters
42
* and the number of ENCODED Utf8 characters
44
void ucstoutf8(const UNICHAR* us, int nus, char* s, int ns,
45
int* remaining, int* encoded)
51
while ((j < ns) && (k < nus)) {
52
UNICHAR uch = us[k++];
57
} else if (uch < 0x0800) {
59
s[j++] = ((uch & 0x07C0) >> 6) | 0xC0;
60
s[j++] = (uch & 0x003F) | 0x80;
64
s[j++] = ((uch & 0xF000) >> 12) | 0xE0;
65
s[j++] = ((uch & 0x0FC0) >> 6) | 0x80;
66
s[j++] = (uch & 0x003F) | 0x80;
78
/** Decodes UTF8 string S with NS characters to Unicode string US with
79
* upto NUS characters, returning the number of REMAINING Utf8 characters
80
* and the number of DECODED Unicode characters.
81
* If skipNUL is non-zero, NUL input characters are skipped.
82
* returns 0 if successful,
83
* -1 if an error occurred during decoding
85
int utf8toucs(const char* s, int ns, UNICHAR* us, int nus,
86
int skipNUL, int* remaining, int* decoded)
93
while ((j < ns) && (k < nus)) {
100
if (j >= ns-2) break;
102
if ( (s[j+1] & 0x40) || !(s[j+1] & 0x80) ||
103
(s[j+2] & 0x40) || !(s[j+2] & 0x80) ) {
107
us[k++] = ((ch & 0x0F) << 12)
108
| ((s[j+1] & 0x3F) << 6)
115
if (j >= ns-1) break;
117
if ( (s[j+1] & 0x40) || !(s[j+1] & 0x80) ) {
121
us[k++] = ((ch & 0x1F) << 6)
127
/* consume 1 (error) */
134
if (ch || !skipNUL) {
151
/** Prints Unicode string US with NUS characters to file stream STREAM,
152
* escaping non-printable ASCII characters and all non-ASCII characters
154
void ucsprint(FILE* stream, const UNICHAR* us, int nus)
156
static const char hexDigits[17] = "0123456789abcdef";
160
for (k=0; k<nus; k++) {
163
if (uch < (UNICHAR)U_SPACE) {
164
/* ASCII control character */
165
fprintf(stream, "^%c", (char) uch+U_ATSIGN);
167
} else if (uch == (UNICHAR)U_CARET) {
169
fprintf(stream, "^^");
171
} else if (uch < (UNICHAR)U_DEL) {
172
/* Printable ASCII character */
173
fprintf(stream, "%c", (char) uch);
176
/* DEL or non-ASCII character */
177
char esc_str[8]="�";
179
for (j=5; j>1; j--) {
180
esc_str[j] = hexDigits[uch%16];
183
fprintf(stream, "%s", esc_str);
189
/** Copy exactly n characters from plain character source string to UNICHAR
190
* destination string, ignoring source characters past a null character and
191
* padding the destination with null characters if necessary.
193
UNICHAR* ucscopy(register UNICHAR* dest, register const char* srcplain,
197
register const UNICHAR* destmx = dest + n;
199
/* Copy characters from source to destination, stopping at NUL */
200
while (dest < destmx) {
201
*dest++ = (ch = *srcplain++);
206
/* Pad with NULs, if necessary */
207
while (dest < destmx)
215
/** Locates first occurrence of character within string and returns pointer
216
* to it if found, else returning null pointer. (character may be NUL)
218
UNICHAR* ucschr(register const UNICHAR* str, register const UNICHAR chr)
222
return (UNICHAR*) str;
223
} while (*str++ != U_NUL);
229
/** Locates last occurrence of character within string and returns pointer
230
* to it if found, else returning null pointer. (character may be NUL)
232
UNICHAR* ucsrchr(register const UNICHAR* str, register const UNICHAR chr)
234
const UNICHAR* retstr = NULL;
238
} while (*str++ != U_NUL);
240
return (UNICHAR*) retstr;
244
/** Compare all characters between string1 and string2, returning
245
* a zero value if all characters are equal, or returning
246
* character1 - character2 for the first character that is different
247
* between the two strings.
248
* (Characters following a null character are not compared.)
250
int ucscmp(register const UNICHAR* str1, register const UNICHAR* str2)
252
register UNICHAR ch1, ch2;
255
if ((ch1 = *str1++) != (ch2 = *str2++))
258
} while (ch1 != U_NUL);
264
/** Compare upto n characters between string1 and string2, returning
265
* a zero value if all compared characters are equal, or returning
266
* character1 - character2 for the first character that is different
267
* between the two strings.
268
* (Characters following a null character are not compared.)
270
int ucsncmp(register const UNICHAR* str1, register const UNICHAR* str2,
273
register UNICHAR ch1, ch2;
274
register const UNICHAR* str1mx = str1 + n;
276
while (str1 < str1mx) {
277
if ((ch1 = *str1++) != (ch2 = *str2++))
288
/** Copy exactly n characters from source to destination, ignoring source
289
* characters past a null character and padding the destination with null
290
* characters if necessary.
292
UNICHAR* ucsncpy(register UNICHAR* dest, register const UNICHAR* src,
296
register const UNICHAR* destmx = dest + n;
298
/* Copy characters from source to destination, stopping at NUL */
299
while (dest < destmx) {
300
*dest++ = (ch = *src++);
305
/* Pad with NULs, if necessary */
306
while (dest < destmx)
313
/** Returns string length
315
size_t ucslen(const UNICHAR* str)
317
register const UNICHAR* strcp = str;
319
while (*strcp++ != U_NUL);
321
return strcp - str - 1;
325
/** Locates substring within string and returns pointer to it if found,
326
* else returning null pointer. If substring has zero length, then full
327
* string is returned.
329
UNICHAR* ucsstr(register const UNICHAR* str, const UNICHAR* substr)
331
register UNICHAR subch1, ch;
333
/* If null substring, return string */
334
if (*substr == U_NUL)
335
return (UNICHAR*) str;
337
/* First character of non-null substring */
340
if ((ch = *str) == U_NUL)
346
/* First character matches; check if rest of substring matches */
347
register const UNICHAR* strcp = str;
348
register const UNICHAR* substrcp = substr;
352
if (*substrcp == U_NUL)
353
return (UNICHAR*) str;
354
} while (*substrcp == *strcp);
357
} while ((ch = *(++str)) != U_NUL);
363
/** Returns length of longest initial segment of string that contains
364
* only the specified characters.
366
size_t ucsspn(const UNICHAR* str, const UNICHAR* chars)
368
register UNICHAR strch, ch;
369
register const UNICHAR* charscp;
370
register const UNICHAR* strcp = str;
372
while ((strch = *strcp++) != U_NUL) {
375
/* Check that it is one of the specified characters */
376
while ((ch = *charscp++) != U_NUL) {
381
return (size_t) (strcp - str - 1);
384
return (size_t) (strcp - str - 1);
388
/** Returns length of longest initial segment of string that does not
389
* contain any of the specified characters.
391
size_t ucscspn(const UNICHAR* str, const UNICHAR* chars)
393
register UNICHAR strch, ch;
394
register const UNICHAR* charscp;
395
register const UNICHAR* strcp = str;
397
while ((strch = *strcp++) != U_NUL) {
400
/* Check that it is not one of the specified characters */
401
while ((ch = *charscp++) != U_NUL) {
403
return (size_t) (strcp - str - 1);
407
return (size_t) (strcp - str - 1);
409
#endif /* !USE_WCHAR */