1
/* ***** BEGIN LICENSE BLOCK *****
2
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
* The contents of this file are subject to the Mozilla Public License Version
5
* 1.1 (the "License"); you may not use this file except in compliance with
6
* the License. You may obtain a copy of the License at
7
* http://www.mozilla.org/MPL/
9
* Software distributed under the License is distributed on an "AS IS" basis,
10
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11
* for the specific language governing rights and limitations under the
14
* The Original Code is the Netscape security libraries.
16
* The Initial Developer of the Original Code is
17
* Netscape Communications Corporation.
18
* Portions created by the Initial Developer are Copyright (C) 1994-2000
19
* the Initial Developer. All Rights Reserved.
23
* Alternatively, the contents of this file may be used under the terms of
24
* either the GNU General Public License Version 2 or later (the "GPL"), or
25
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26
* in which case the provisions of the GPL or the LGPL are applicable instead
27
* of those above. If you wish to allow use of your version of this file only
28
* under the terms of either the GPL or the LGPL, and not to allow others to
29
* use your version of this file under the terms of the MPL, indicate your
30
* decision by deleting the provisions above and replace them with the notice
31
* and other provisions required by the GPL or the LGPL. If you do not delete
32
* the provisions above, a recipient may use your version of this file under
33
* the terms of any one of the MPL, the GPL or the LGPL.
35
* ***** END LICENSE BLOCK ***** */
38
static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.7 $ $Date: 2005/01/20 02:25:45 $";
44
* This file contains some additional utility routines required for
45
* handling UTF8 strings.
57
* There's an "is hex string" function in pki1/atav.c. If we need
58
* it in more places, pull that one out.
62
* nssUTF8_CaseIgnoreMatch
64
* Returns true if the two UTF8-encoded strings pointed to by the
65
* two specified NSSUTF8 pointers differ only in typcase.
67
* The error may be one of the following values:
68
* NSS_ERROR_INVALID_POINTER
71
* PR_TRUE if the strings match, ignoring case
72
* PR_FALSE if they don't
77
nssUTF8_CaseIgnoreMatch
85
if( ((const NSSUTF8 *)NULL == a) ||
86
((const NSSUTF8 *)NULL == b) ) {
87
nss_SetError(NSS_ERROR_INVALID_POINTER);
88
if( (PRStatus *)NULL != statusOpt ) {
89
*statusOpt = PR_FAILURE;
95
if( (PRStatus *)NULL != statusOpt ) {
96
*statusOpt = PR_SUCCESS;
102
* This is, like, so wrong!
104
if( 0 == PL_strcasecmp((const char *)a, (const char *)b) ) {
112
* nssUTF8_PrintableMatch
114
* Returns true if the two Printable strings pointed to by the
115
* two specified NSSUTF8 pointers match when compared with the
116
* rules for Printable String (leading and trailing spaces are
117
* disregarded, extents of whitespace match irregardless of length,
118
* and case is not significant), then PR_TRUE will be returned.
119
* Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE
120
* will be returned. If the optional statusOpt argument is not
121
* NULL, then PR_SUCCESS or PR_FAILURE will be stored in that
124
* The error may be one of the following values:
125
* NSS_ERROR_INVALID_POINTER
128
* PR_TRUE if the strings match, ignoring case
129
* PR_FALSE if they don't
130
* PR_FALSE upon error
134
nssUTF8_PrintableMatch
145
if( ((const NSSUTF8 *)NULL == a) ||
146
((const NSSUTF8 *)NULL == b) ) {
147
nss_SetError(NSS_ERROR_INVALID_POINTER);
148
if( (PRStatus *)NULL != statusOpt ) {
149
*statusOpt = PR_FAILURE;
153
#endif /* NSSDEBUG */
155
if( (PRStatus *)NULL != statusOpt ) {
156
*statusOpt = PR_SUCCESS;
170
while( ('\0' != *c) && ('\0' != *d) ) {
176
if( ('a' <= e) && (e <= 'z') ) {
180
if( ('a' <= f) && (f <= 'z') ) {
215
/* And both '\0', btw */
225
* This routine duplicates the UTF8-encoded string pointed to by the
226
* specified NSSUTF8 pointer. If the optional arenaOpt argument is
227
* not null, the memory required will be obtained from that arena;
228
* otherwise, the memory required will be obtained from the heap.
229
* A pointer to the new string will be returned. In case of error,
230
* an error will be placed on the error stack and NULL will be
233
* The error may be one of the following values:
234
* NSS_ERROR_INVALID_POINTER
235
* NSS_ERROR_INVALID_ARENA
236
* NSS_ERROR_NO_MEMORY
239
NSS_IMPLEMENT NSSUTF8 *
250
if( (const NSSUTF8 *)NULL == s ) {
251
nss_SetError(NSS_ERROR_INVALID_POINTER);
252
return (NSSUTF8 *)NULL;
255
if( (NSSArena *)NULL != arenaOpt ) {
256
if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
257
return (NSSUTF8 *)NULL;
260
#endif /* NSSDEBUG */
262
len = PL_strlen((const char *)s);
264
if( '\0' != ((const char *)s)[ len ] ) {
265
/* must have wrapped, e.g., too big for PRUint32 */
266
nss_SetError(NSS_ERROR_NO_MEMORY);
267
return (NSSUTF8 *)NULL;
269
#endif /* PEDANTIC */
270
len++; /* zero termination */
272
rv = nss_ZAlloc(arenaOpt, len);
273
if( (void *)NULL == rv ) {
274
return (NSSUTF8 *)NULL;
277
(void)nsslibc_memcpy(rv, s, len);
284
* This routine returns the length in bytes (including the terminating
285
* null) of the UTF8-encoded string pointed to by the specified
286
* NSSUTF8 pointer. Zero is returned on error.
288
* The error may be one of the following values:
289
* NSS_ERROR_INVALID_POINTER
290
* NSS_ERROR_VALUE_TOO_LARGE
294
* nonzero length of the string.
297
NSS_IMPLEMENT PRUint32
307
if( (const NSSUTF8 *)NULL == s ) {
308
nss_SetError(NSS_ERROR_INVALID_POINTER);
309
if( (PRStatus *)NULL != statusOpt ) {
310
*statusOpt = PR_FAILURE;
314
#endif /* NSSDEBUG */
316
sv = PL_strlen((const char *)s) + 1;
318
if( '\0' != ((const char *)s)[ sv-1 ] ) {
320
nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
321
if( (PRStatus *)NULL != statusOpt ) {
322
*statusOpt = PR_FAILURE;
326
#endif /* PEDANTIC */
328
if( (PRStatus *)NULL != statusOpt ) {
329
*statusOpt = PR_SUCCESS;
338
* This routine returns the length in characters (not including the
339
* terminating null) of the UTF8-encoded string pointed to by the
340
* specified NSSUTF8 pointer.
342
* The error may be one of the following values:
343
* NSS_ERROR_INVALID_POINTER
344
* NSS_ERROR_VALUE_TOO_LARGE
345
* NSS_ERROR_INVALID_STRING
348
* length of the string (which may be zero)
352
NSS_IMPLEMENT PRUint32
360
const PRUint8 *c = (const PRUint8 *)s;
363
if( (const NSSUTF8 *)NULL == s ) {
364
nss_SetError(NSS_ERROR_INVALID_POINTER);
367
#endif /* NSSDEBUG */
372
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
373
* 0000 0000-0000 007F 0xxxxxxx
374
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
375
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
376
* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
377
* 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
378
* 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
383
if( (*c & 0x80) == 0 ) {
385
} else if( (*c & 0xE0) == 0xC0 ) {
387
} else if( (*c & 0xF0) == 0xE0 ) {
389
} else if( (*c & 0xF8) == 0xF0 ) {
391
} else if( (*c & 0xFC) == 0xF8 ) {
393
} else if( (*c & 0xFE) == 0xFC ) {
396
nss_SetError(NSS_ERROR_INVALID_STRING);
404
/* Wrapped-- too big */
405
nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
411
for( d = &c[1]; d < &c[incr]; d++ ) {
412
if( (*d & 0xC0) != 0xF0 ) {
413
nss_SetError(NSS_ERROR_INVALID_STRING);
418
#endif /* PEDANTIC */
423
if( (PRStatus *)NULL != statusOpt ) {
424
*statusOpt = PR_SUCCESS;
430
if( (PRStatus *)NULL != statusOpt ) {
431
*statusOpt = PR_FAILURE;
441
* This routine creates a UTF8 string from a string in some other
442
* format. Some types of string may include embedded null characters,
443
* so for them the length parameter must be used. For string types
444
* that are null-terminated, the length parameter is optional; if it
445
* is zero, it will be ignored. If the optional arena argument is
446
* non-null, the memory used for the new string will be obtained from
447
* that arena, otherwise it will be obtained from the heap. This
448
* routine may return NULL upon error, in which case it will have
449
* placed an error on the error stack.
451
* The error may be one of the following:
452
* NSS_ERROR_INVALID_POINTER
453
* NSS_ERROR_NO_MEMORY
454
* NSS_ERROR_UNSUPPORTED_TYPE
458
* A non-null pointer to a new UTF8 string otherwise
461
extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */
463
NSS_IMPLEMENT NSSUTF8 *
468
const void *inputString,
469
PRUint32 size /* in bytes, not characters */
475
if( (NSSArena *)NULL != arenaOpt ) {
476
if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
477
return (NSSUTF8 *)NULL;
481
if( (const void *)NULL == inputString ) {
482
nss_SetError(NSS_ERROR_INVALID_POINTER);
483
return (NSSUTF8 *)NULL;
485
#endif /* NSSDEBUG */
488
case nssStringType_DirectoryString:
489
/* This is a composite type requiring BER */
490
nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
492
case nssStringType_TeletexString:
494
* draft-ietf-pkix-ipki-part1-11 says in part:
496
* In addition, many legacy implementations support names encoded
497
* in the ISO 8859-1 character set (Latin1String) but tag them as
498
* TeletexString. The Latin1String includes characters used in
499
* Western European countries which are not part of the
500
* TeletexString charcter set. Implementations that process
501
* TeletexString SHOULD be prepared to handle the entire ISO
502
* 8859-1 character set.[ISO 8859-1].
504
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
506
case nssStringType_PrintableString:
508
* PrintableString consists of A-Za-z0-9 ,()+,-./:=?
509
* This is a subset of ASCII, which is a subset of UTF8.
510
* So we can just duplicate the string over.
514
rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
516
rv = nss_ZAlloc(arenaOpt, size+1);
517
if( (NSSUTF8 *)NULL == rv ) {
518
return (NSSUTF8 *)NULL;
521
(void)nsslibc_memcpy(rv, inputString, size);
525
case nssStringType_UniversalString:
527
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
529
case nssStringType_BMPString:
530
/* Base Multilingual Plane of Unicode */
531
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
533
case nssStringType_UTF8String:
535
rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
537
rv = nss_ZAlloc(arenaOpt, size+1);
538
if( (NSSUTF8 *)NULL == rv ) {
539
return (NSSUTF8 *)NULL;
542
(void)nsslibc_memcpy(rv, inputString, size);
546
case nssStringType_PHGString:
548
* PHGString is an IA5String (with case-insensitive comparisons).
549
* IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has
552
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
554
case nssStringType_GeneralString:
555
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
558
nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
565
NSS_IMPLEMENT NSSItem *
574
NSSItem *rv = (NSSItem *)NULL;
575
PRStatus status = PR_SUCCESS;
578
if( (NSSArena *)NULL != arenaOpt ) {
579
if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
580
return (NSSItem *)NULL;
584
if( (NSSUTF8 *)NULL == string ) {
585
nss_SetError(NSS_ERROR_INVALID_POINTER);
586
return (NSSItem *)NULL;
588
#endif /* NSSDEBUG */
591
case nssStringType_DirectoryString:
592
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
594
case nssStringType_TeletexString:
595
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
597
case nssStringType_PrintableString:
598
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
600
case nssStringType_UniversalString:
601
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
603
case nssStringType_BMPString:
604
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
606
case nssStringType_UTF8String:
608
NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt);
609
if( (NSSUTF8 *)NULL == dup ) {
610
return (NSSItem *)NULL;
613
if( (NSSItem *)NULL == rvOpt ) {
614
rv = nss_ZNEW(arenaOpt, NSSItem);
615
if( (NSSItem *)NULL == rv ) {
616
(void)nss_ZFreeIf(dup);
617
return (NSSItem *)NULL;
624
dup = (NSSUTF8 *)NULL;
625
rv->size = nssUTF8_Size(rv->data, &status);
626
if( (0 == rv->size) && (PR_SUCCESS != status) ) {
627
if( (NSSItem *)NULL == rvOpt ) {
628
(void)nss_ZFreeIf(rv);
630
return (NSSItem *)NULL;
634
case nssStringType_PHGString:
635
nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
638
nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
646
* nssUTF8_CopyIntoFixedBuffer
648
* This will copy a UTF8 string into a fixed-length buffer, making
649
* sure that the all characters are valid. Any remaining space will
650
* be padded with the specified ASCII character, typically either
656
NSS_IMPLEMENT PRStatus
657
nssUTF8_CopyIntoFixedBuffer
665
PRUint32 stringSize = 0;
668
if( (char *)NULL == buffer ) {
669
nss_SetError(NSS_ERROR_INVALID_POINTER);
673
if( 0 == bufferSize ) {
674
nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
678
if( (pad & 0x80) != 0x00 ) {
679
nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
682
#endif /* NSSDEBUG */
684
if( (NSSUTF8 *)NULL == string ) {
685
string = (NSSUTF8 *) "";
688
stringSize = nssUTF8_Size(string, (PRStatus *)NULL);
689
stringSize--; /* don't count the trailing null */
690
if( stringSize > bufferSize ) {
691
PRUint32 bs = bufferSize;
692
(void)nsslibc_memcpy(buffer, string, bufferSize);
694
if( ( ((buffer[ bs-1 ] & 0x80) == 0x00)) ||
695
((bs > 1) && ((buffer[ bs-2 ] & 0xE0) == 0xC0)) ||
696
((bs > 2) && ((buffer[ bs-3 ] & 0xF0) == 0xE0)) ||
697
((bs > 3) && ((buffer[ bs-4 ] & 0xF8) == 0xF0)) ||
698
((bs > 4) && ((buffer[ bs-5 ] & 0xFC) == 0xF8)) ||
699
((bs > 5) && ((buffer[ bs-6 ] & 0xFE) == 0xFC)) ) {
704
/* Too long. We have to trim the last character */
705
for( /*bs*/; bs != 0; bs-- ) {
706
if( (buffer[bs-1] & 0xC0) != 0x80 ) {
714
(void)nsslibc_memset(buffer, pad, bufferSize);
715
(void)nsslibc_memcpy(buffer, string, stringSize);
737
if( ((const NSSUTF8 *)NULL == a) ||
738
((const NSSUTF8 *)NULL == b) ) {
739
nss_SetError(NSS_ERROR_INVALID_POINTER);
740
if( (PRStatus *)NULL != statusOpt ) {
741
*statusOpt = PR_FAILURE;
745
#endif /* NSSDEBUG */
747
la = nssUTF8_Size(a, statusOpt);
752
lb = nssUTF8_Size(b, statusOpt);
761
return nsslibc_memequal(a, b, la, statusOpt);