2
******************************************************************************
4
* Copyright (C) 1997-2001, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
******************************************************************************
9
* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
11
* Date Name Description
12
* 04/14/97 aliu Creation.
13
* 04/24/97 aliu Added getDefaultDataDirectory() and
14
* getDefaultLocaleID().
15
* 04/28/97 aliu Rewritten to assume Unix and apply general methods
16
* for assumed case. Non-UNIX platforms must be
17
* special-cased. Rewrote numeric methods dealing
18
* with NaN and Infinity to be platform independent
19
* over all IEEE 754 platforms.
20
* 05/13/97 aliu Restored sign of timezone
21
* (semantics are hours West of GMT)
22
* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
24
* 07/22/98 stephen Added remainder, max, min, trunc
25
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26
* 08/24/98 stephen Added longBitsFromDouble
27
* 09/08/98 stephen Minor changes for Mac Port
28
* 03/02/99 stephen Removed openFile(). Added AS400 support.
30
* 04/15/99 stephen Converted to C.
31
* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32
* 08/04/99 jeffrey R. Added OS/2 changes
33
* 11/15/99 helena Integrated S/390 IEEE support.
34
* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleIDM
35
* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36
******************************************************************************
40
# include<sys/types.h>
45
/* Define _XOPEN_SOURCE for Solaris and friends. */
46
/* NetBSD needs it to be >= 4 */
48
#define _XOPEN_SOURCE 4
51
/* Define __USE_POSIX and __USE_XOPEN for Linux and glibc. */
61
/* Include standard headers. */
70
/* include ICU headers */
71
#include "unicode/utypes.h"
72
#include "unicode/putil.h"
79
/* include system headers */
81
# define WIN32_LEAN_AND_MEAN
90
# define INCL_DOSERRORS
91
# define INCL_DOSMODULEMGR
95
# include <qusec.h> /* error code structure */
96
# include <qusrjobi.h>
97
# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
100
# include <IntlResources.h>
102
# include <Folders.h>
103
# include <MacTypes.h>
104
# include <TextUtils.h>
107
# include <sys/ldr.h>
109
#elif defined(U_SOLARIS) || defined(U_LINUX)
120
/* Define the extension for data files, again... */
121
#define DATA_TYPE "dat"
123
/* floating point implementations ------------------------------------------- */
125
/* We return QNAN rather than SNAN*/
127
#define NAN_TOP ((int16_t)0x7FF8)
128
#define INF_TOP ((int16_t)0x7FF0)
130
#define NAN_TOP ((int16_t)0x7F08)
131
#define INF_TOP ((int16_t)0x3F00)
134
#define SIGN 0x80000000U
137
static UBool fgNaNInitialized = FALSE;
139
static UBool fgInfInitialized = FALSE;
143
static char* u_topNBytesOfDouble(double* d, int n);
144
static char* u_bottomNBytesOfDouble(double* d, int n);
145
/*static void uprv_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo);*/
148
/*---------------------------------------------------------------------------
150
Our general strategy is to assume we're on a POSIX platform. Platforms which
151
are non-POSIX must declare themselves so. The default POSIX implementation
152
will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
154
---------------------------------------------------------------------------*/
156
#if defined(_WIN32) || defined(XP_MAC) || defined(OS400) || defined(OS2)
157
# undef U_POSIX_LOCALE
159
# define U_POSIX_LOCALE 1
163
* Only include langinfo.h if we have a way to get the codeset. If we later
164
* depend on more feature, we can test on U_HAVE_NL_LANGINFO.
168
#if U_HAVE_NL_LANGINFO_CODESET
169
#include <langinfo.h>
172
/*---------------------------------------------------------------------------
173
Universal Implementations
174
These are designed to work on all platforms. Try these, and if they don't
175
work on your platform, then special case your platform with new
177
---------------------------------------------------------------------------*/
179
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
180
U_CAPI int32_t U_EXPORT2
187
memset( &tmrec, 0, sizeof(tmrec) );
191
t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
194
memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
195
t2 = mktime(&tmrec); /* seconds of current GMT*/
196
return t2 - t1; /* GMT (or UTC) in seconds since 1970*/
204
/*-----------------------------------------------------------------------------
206
These methods detect and return NaN and infinity values for doubles
207
conforming to IEEE 754. Platforms which support this standard include X86,
208
Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
209
If this doesn't work on your platform, you have non-IEEE floating-point, and
210
will need to code your own versions. A naive implementation is to return 0.0
211
for getNaN and getInfinity, and false for isNaN and isInfinite.
212
---------------------------------------------------------------------------*/
214
U_CAPI UBool U_EXPORT2
215
uprv_isNaN(double number)
218
/* This should work in theory, but it doesn't, so we resort to the more*/
219
/* complicated method below.*/
220
/* return number != number;*/
222
/* You can't return number == getNaN() because, by definition, NaN != x for*/
223
/* all x, including NaN (that is, NaN != NaN). So instead, we compare*/
224
/* against the known bit pattern. We must be careful of endianism here.*/
225
/* The pattern we are looking for id:*/
227
/* 7FFy yyyy yyyy yyyy (some y non-zero)*/
229
/* There are two different kinds of NaN, but we ignore the distinction*/
230
/* here. Note that the y value must be non-zero; if it is zero, then we*/
233
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
235
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
238
return (UBool)(((highBits & 0x7FF00000L) == 0x7FF00000L) &&
239
(((highBits & 0x000FFFFFL) != 0) || (lowBits != 0)));
242
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
244
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
247
return ((highBits & 0x7F080000L) == 0x7F080000L) &&
248
(lowBits == 0x00000000L);
251
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
252
/* you'll need to replace this default implementation with what's correct*/
253
/* for your platform.*/
254
return number != number;
258
U_CAPI UBool U_EXPORT2
259
uprv_isInfinite(double number)
262
/* We know the top bit is the sign bit, so we mask that off in a copy of */
263
/* the number and compare against infinity. [LIU]*/
264
/* The following approach doesn't work for some reason, so we go ahead and */
265
/* scrutinize the pattern itself. */
266
/* double a = number; */
267
/* *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
268
/* return a == uprv_getInfinity();*/
269
/* Instead, We want to see either:*/
271
/* 7FF0 0000 0000 0000*/
272
/* FFF0 0000 0000 0000*/
274
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
276
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
279
return (UBool)(((highBits & ~SIGN) == 0x7FF00000U) &&
280
(lowBits == 0x00000000U));
283
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
285
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
288
return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
291
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
292
/* value, you'll need to replace this default implementation with what's*/
293
/* correct for your platform.*/
294
return number == (2.0 * number);
298
U_CAPI UBool U_EXPORT2
299
uprv_isPositiveInfinity(double number)
301
#if IEEE_754 || defined(OS390)
302
return (UBool)(number > 0 && uprv_isInfinite(number));
304
return uprv_isInfinite(number);
308
U_CAPI UBool U_EXPORT2
309
uprv_isNegativeInfinity(double number)
311
#if IEEE_754 || defined(OS390)
312
return (UBool)(number < 0 && uprv_isInfinite(number));
315
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
317
return((highBits & SIGN) && uprv_isInfinite(number));
322
U_CAPI double U_EXPORT2
325
#if IEEE_754 || defined(OS390)
326
if( !fgNaNInitialized) {
328
if( ! fgNaNInitialized) {
330
int8_t* p = (int8_t*)&fgNan;
331
for(i = 0; i < sizeof(double); ++i)
333
*(int16_t*)u_topNBytesOfDouble(&fgNan, sizeof(NAN_TOP)) = NAN_TOP;
334
fgNaNInitialized = TRUE;
340
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
341
/* you'll need to replace this default implementation with what's correct*/
342
/* for your platform.*/
347
U_CAPI double U_EXPORT2
350
#if IEEE_754 || defined(OS390)
351
if (!fgInfInitialized)
354
int8_t* p = (int8_t*)&fgInf;
355
for(i = 0; i < sizeof(double); ++i)
357
*(int16_t*)u_topNBytesOfDouble(&fgInf, sizeof(INF_TOP)) = INF_TOP;
358
fgInfInitialized = TRUE;
362
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
363
/* value, you'll need to replace this default implementation with what's*/
364
/* correct for your platform.*/
369
U_CAPI double U_EXPORT2
375
U_CAPI double U_EXPORT2
381
U_CAPI double U_EXPORT2
384
return uprv_floor(x + 0.5);
387
U_CAPI double U_EXPORT2
393
U_CAPI double U_EXPORT2
394
uprv_modf(double x, double* y)
399
U_CAPI double U_EXPORT2
400
uprv_fmod(double x, double y)
405
U_CAPI double U_EXPORT2
406
uprv_pow(double x, double y)
408
/* This is declared as "double pow(double x, double y)" */
412
U_CAPI double U_EXPORT2
413
uprv_pow10(int32_t x)
415
return pow(10.0, (double)x);
418
U_CAPI double U_EXPORT2
419
uprv_fmax(double x, double y)
424
/* first handle NaN*/
425
if(uprv_isNaN(x) || uprv_isNaN(y))
426
return uprv_getNaN();
428
/* check for -0 and 0*/
429
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
430
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
435
/* this should work for all flt point w/o NaN and Infpecial cases */
436
return (x > y ? x : y);
439
U_CAPI int32_t U_EXPORT2
440
uprv_max(int32_t x, int32_t y)
442
return (x > y ? x : y);
445
U_CAPI double U_EXPORT2
446
uprv_fmin(double x, double y)
451
/* first handle NaN*/
452
if(uprv_isNaN(x) || uprv_isNaN(y))
453
return uprv_getNaN();
455
/* check for -0 and 0*/
456
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
457
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
462
/* this should work for all flt point w/o NaN and Inf special cases */
463
return (x > y ? y : x);
466
U_CAPI int32_t U_EXPORT2
467
uprv_min(int32_t x, int32_t y)
469
return (x > y ? y : x);
473
* Truncates the given double.
474
* trunc(3.3) = 3.0, trunc (-3.3) = -3.0
475
* This is different than calling floor() or ceil():
476
* floor(3.3) = 3, floor(-3.3) = -4
477
* ceil(3.3) = 4, ceil(-3.3) = -3
479
U_CAPI double U_EXPORT2
485
/* handle error cases*/
487
return uprv_getNaN();
488
if(uprv_isInfinite(d))
489
return uprv_getInfinity();
491
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
492
if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
498
return d >= 0 ? floor(d) : ceil(d);
505
uprv_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo)
507
*hi = *(int32_t*)u_topNBytesOfDouble(&d, sizeof(int32_t));
508
*lo = *(uint32_t*)u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
513
* Return the largest positive number that can be represented by an integer
514
* type of arbitrary bit length.
516
U_CAPI double U_EXPORT2
517
uprv_maxMantissa(void)
519
return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
523
* Return the floor of the log base 10 of a given double.
524
* This method compensates for inaccuracies which arise naturally when
525
* computing logs, and always give the correct value. The parameter
526
* must be positive and finite.
527
* (Thanks to Alan Liu for supplying this function.)
529
U_CAPI int16_t U_EXPORT2
533
/* We don't use the normal implementation because you can't underflow */
534
/* a double otherwise an underflow exception occurs */
537
/* The reason this routine is needed is that simply taking the*/
538
/* log and dividing by log10 yields a result which may be off*/
539
/* by 1 due to rounding errors. For example, the naive log10*/
540
/* of 1.0e300 taken this way is 299, rather than 300.*/
541
double alog10 = log(d) / log(10.0);
542
int16_t ailog10 = (int16_t) floor(alog10);
544
/* Positive logs could be too small, e.g. 0.99 instead of 1.0*/
545
if (alog10 > 0 && d >= pow(10.0, (double)(ailog10 + 1)))
548
/* Negative logs could be too big, e.g. -0.99 instead of -1.0*/
549
else if (alog10 < 0 && d < pow(10.0, (double)(ailog10)))
556
U_CAPI double U_EXPORT2
562
U_CAPI int32_t U_EXPORT2
563
uprv_digitsAfterDecimal(double x)
566
int32_t numDigits, bytesWritten;
568
int32_t ptPos, exponent;
570
/* cheat and use the string-format routine to get a string representation*/
571
/* (it handles mathematical inaccuracy better than we can), then find out */
572
/* many characters are to the right of the decimal point */
573
bytesWritten = sprintf(buffer, "%+.9g", x);
574
while (isdigit(*(++p))) {
577
ptPos = (int32_t)(p - buffer);
578
numDigits = (int32_t)(bytesWritten - ptPos - 1);
580
/* if the number's string representation is in scientific notation, find */
581
/* the exponent and take it into account*/
583
p = uprv_strchr(buffer, 'e');
585
int16_t expPos = (int16_t)(p - buffer);
586
numDigits -= bytesWritten - expPos;
587
exponent = (int32_t)(atol(p + 1));
590
/* the string representation may still have spurious decimal digits in it, */
591
/* so we cut off at the ninth digit to the right of the decimal, and have */
592
/* to search backward from there to the first non-zero digit*/
595
while (numDigits > 0 && buffer[ptPos + numDigits] == '0')
598
numDigits -= exponent;
605
U_CAPI double U_EXPORT2
606
uprv_nextDouble(double d, UBool next)
611
int32_t highMagnitude;
612
uint32_t lowMagnitude;
614
uint32_t *highResult, *lowResult;
617
/* filter out NaN's */
622
/* zero's are also a special case */
624
double smallestPositiveDouble = 0.0;
626
(uint32_t *)u_bottomNBytesOfDouble(&smallestPositiveDouble,
631
/* Don't get an underflow exception */
632
*(plowBits-1) = 0x00100000;
636
return smallestPositiveDouble;
638
return -smallestPositiveDouble;
642
/* if we get here, d is a nonzero value */
644
/* hold all bits for later use */
645
highBits = *(int32_t*)u_topNBytesOfDouble(&d, sizeof(uint32_t));
646
lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
648
/* strip off the sign bit */
649
highMagnitude = highBits & ~SIGN;
650
lowMagnitude = lowBits;
652
/* if next double away from zero, increase magnitude */
653
if ((highBits >= 0) == next) {
654
if (highMagnitude != 0x7FF00000L || lowMagnitude != 0x00000000L) {
656
if (lowMagnitude == 0) {
661
/* else decrease magnitude */
664
if (lowMagnitude > lowBits) {
668
/* Don't get an underflow exception */
669
if (highMagnitude < 0x00100000 ||
670
(highMagnitude == 0x00100000 && lowMagnitude == 0))
678
/* construct result and return */
679
signBit = highBits & SIGN;
680
highResult = (uint32_t *)u_topNBytesOfDouble(&result, sizeof(uint32_t));
681
lowResult = (uint32_t *)u_bottomNBytesOfDouble(&result, sizeof(uint32_t));
683
*highResult = signBit | highMagnitude;
684
*lowResult = lowMagnitude;
688
/* This is the portable implementation...*/
689
/* a small coefficient within the precision of the mantissa*/
690
static const double smallValue = 1e-10;
691
double epsilon = ((d<0)?-d:d) * smallValue; /* first approximation*/
692
double last_eps, sum;
695
epsilon = smallValue; /* for very small d's*/
698
/* avoid higher precision possibly used for temporay values*/
700
last_eps = epsilon * 2.0;
703
while ((sum != d) && (epsilon != last_eps)) {
713
u_topNBytesOfDouble(double* d, int n)
718
return (char*)(d + 1) - n;
722
static char* u_bottomNBytesOfDouble(double* d, int n)
725
return (char*)(d + 1) - n;
731
/*---------------------------------------------------------------------------
732
Platform-specific Implementations
733
Try these, and if they don't work on your platform, then special case your
734
platform with new implementations.
735
---------------------------------------------------------------------------*/
737
/* Time zone utilities */
738
U_CAPI void U_EXPORT2
744
/* no initialization*/
748
U_CAPI int32_t U_EXPORT2
760
memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
761
dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
762
t1 = mktime(&tmrec); /* local time in seconds*/
763
memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
764
t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
766
/* imitate NT behaviour, which returns same timezone offset to GMT for
774
/* Note that U_TZNAME does *not* have to be tzname, but if it does,
775
some platforms need to have it declared here. */
777
#if defined(IRIX) || defined(U_DARWIN) /* For SGI/MacOSX. */
778
extern char *tzname[]; /* RS6000 and others reject char **tzname. */
781
U_CAPI char* U_EXPORT2
791
/* Get and set the ICU data directory --------------------------------------- */
793
static char *gDataDirectory = NULL;
795
static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
798
UBool putil_cleanup(void)
800
if (gDataDirectory) {
801
uprv_free(gDataDirectory);
802
gDataDirectory = NULL;
805
if (gCorrectedPOSIXLocale) {
806
uprv_free(gCorrectedPOSIXLocale);
807
gCorrectedPOSIXLocale = NULL;
814
* Set the data directory.
815
* Make a copy of the passed string, and set the global data dir to point to it.
817
U_CAPI void U_EXPORT2
818
u_setDataDirectory(const char *directory) {
821
if(directory!=NULL) {
822
int length=uprv_strlen(directory);
823
newDataDir = (char *)uprv_malloc(length + 2);
824
uprv_strcpy(newDataDir, directory);
825
if(newDataDir[length-1]!=U_FILE_SEP_CHAR) {
826
newDataDir[length++]=U_FILE_SEP_CHAR;
827
newDataDir[length] = 0;
831
if (gDataDirectory) {
832
uprv_free(gDataDirectory);
834
gDataDirectory = newDataDir;
839
U_CAPI const char * U_EXPORT2
840
u_getDataDirectory(void) {
841
const char *path = NULL;
842
char pathBuffer[1024];
844
/* if we have the directory, then return it immediately */
846
return gDataDirectory;
849
/* we need to look for it */
850
pathBuffer[0] = 0; /* Shuts up compiler warnings about unreferenced */
851
/* variables when the code using it is ifdefed out */
852
# if !defined(XP_MAC)
853
/* first try to get the environment variable */
854
path=getenv("ICU_DATA");
868
myErr = HGetVol(xpath, &volNum, &dir);
871
myErr = FindFolder(volNum, kApplicationSupportFolderType, TRUE, &vRef, &dir);
873
if (myErr == noErr) {
874
myErr = DirCreate(volNum,
878
if( (myErr == noErr) || (myErr == dupFNErr) ) {
879
spec.vRefNum = volNum;
881
uprv_memcpy(spec.name, "\pICU", 4);
883
myErr = FSpGetFullPath(&spec, &len, &full);
887
uprv_memcpy(pathBuffer, ((char*)(*full)), len);
899
# if defined WIN32 && defined ICU_ENABLE_DEPRECATED_WIN_REGISTRY
900
/* next, try to read the path from the registry */
901
if(path==NULL || *path==0) {
904
if(ERROR_SUCCESS==RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\ICU\\Unicode\\Data", 0, KEY_QUERY_VALUE, &key)) {
905
DWORD type=REG_EXPAND_SZ, size=sizeof(pathBuffer);
907
if(ERROR_SUCCESS==RegQueryValueEx(key, "Path", NULL, &type, (unsigned char *)pathBuffer, &size) && size>1) {
908
if(type==REG_EXPAND_SZ) {
909
/* replace environment variable references by their values */
910
char temporaryPath[1024];
912
/* copy the path with variables to the temporary one */
913
uprv_memcpy(temporaryPath, pathBuffer, size);
915
/* do the replacement and store it in the pathBuffer */
916
size=ExpandEnvironmentStrings(temporaryPath, pathBuffer, sizeof(pathBuffer));
917
if(size>0 && size<sizeof(pathBuffer)) {
920
} else if(type==REG_SZ) {
929
/* ICU_DATA_DIR may be set as a compile option */
931
if(path==NULL || *path==0) {
937
/* It looks really bad, set it to something. */
941
u_setDataDirectory(path);
942
return gDataDirectory;
949
/* Macintosh-specific locale information ------------------------------------ */
960
/* Todo: This will be updated with a newer version from www.unicode.org web
961
page when it's available.*/
962
#define MAC_LC_MAGIC_NUMBER -5
963
#define MAC_LC_INIT_NUMBER -9
965
static const mac_lc_rec mac_lc_recs[] = {
966
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
968
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
970
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
972
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
974
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
976
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
978
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
979
/* French for Belgium or Lxembourg*/
980
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
982
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
984
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
986
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
988
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
990
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
992
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
994
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
995
/* the Arabic world (?)*/
996
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
998
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
999
/* French for Switzerland*/
1000
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1001
/* German for Switzerland*/
1002
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1004
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1006
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1008
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1010
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1012
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1013
/* Croatian system for Yugoslavia*/
1014
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1015
/* Hindi system for India*/
1016
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1018
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1020
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1022
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1024
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1026
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1028
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1029
/* Lapland [Ask Rich for the data. HS]*/
1030
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1032
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1034
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1036
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1038
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1040
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1041
/* People's Republic of China*/
1042
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1044
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1047
/* fallback is en_US*/
1048
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1049
MAC_LC_MAGIC_NUMBER, "en_US"
1055
/* Return just the POSIX id, whatever happens to be in it */
1056
static const char *uprv_getPOSIXID(void)
1058
static const char* posixID = NULL;
1060
posixID = getenv("LC_ALL");
1062
posixID = getenv("LANG");
1065
* On Solaris two different calls to setlocale can result in
1066
* different values. Only get this value once.
1068
posixID = setlocale(LC_ALL, NULL);
1075
/* Nothing worked. Give it a nice value. */
1078
else if ((uprv_strcmp("C", posixID) == 0)
1079
|| (uprv_strchr(posixID, ' ') != NULL)
1080
|| (uprv_strchr(posixID, '/') != NULL))
1081
{ /* HPUX returns 'C C C C C C C' */
1082
/* Solaris can return /en_US/C/C/C/C/C on the second try. */
1083
/* Maybe we got some garbage. Give it a nice value. */
1084
posixID = "en_US_POSIX";
1090
U_CAPI const char* U_EXPORT2
1091
uprv_getDefaultLocaleID()
1095
Note that: (a '!' means the ID is improper somehow)
1096
LC_ALL ----> default_loc codepage
1097
--------------------------------------------------------
1102
ab_CD.EF@GH ab_CD_GH EF
1104
Some 'improper' ways to do the same as above:
1105
! ab_CD@GH.EF ab_CD_GH EF
1106
! ab_CD.EF@GH.IJ ab_CD_GH EF
1107
! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1112
The variant cannot have dots in it.
1113
The 'rightmost' variant (@xxx) wins.
1114
The leftmost codepage (.xxx) wins.
1116
char *correctedPOSIXLocale = 0;
1117
const char* posixID = uprv_getPOSIXID();
1122
/* Format: (no spaces)
1123
ll [ _CC ] [ . MM ] [ @ VV]
1125
l = lang, C = ctry, M = charmap, V = variant
1128
if(gCorrectedPOSIXLocale != NULL) {
1129
return gCorrectedPOSIXLocale;
1132
if((p = uprv_strchr(posixID, '.')) != NULL)
1134
/* assume new locale can't be larger than old one? */
1135
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
1136
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1137
correctedPOSIXLocale[p-posixID] = 0;
1139
/* do not copy after the @ */
1140
if((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL)
1142
correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1146
/* Note that we scan the *uncorrected* ID. */
1147
if((p = uprv_strrchr(posixID, '@')) != NULL)
1149
if(correctedPOSIXLocale == NULL) {
1150
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID));
1151
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1152
correctedPOSIXLocale[p-posixID] = 0;
1156
/* Take care of any special cases here.. */
1157
if(!uprv_strcmp(p, "nynorsk"))
1161
/* Should we assume no_NO_NY instead of possible no__NY?
1162
* if(!uprv_strcmp(correctedPOSIXLocale, "no")) {
1163
* uprv_strcpy(correctedPOSIXLocale, "no_NO");
1168
if(uprv_strchr(correctedPOSIXLocale,'_') == NULL)
1170
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1174
uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1177
if((q = uprv_strchr(p, '.')) != NULL)
1179
/* How big will the resulting string be? */
1180
len = uprv_strlen(correctedPOSIXLocale) + (q-p);
1181
uprv_strncat(correctedPOSIXLocale, p, q-p);
1182
correctedPOSIXLocale[len] = 0;
1186
uprv_strcat(correctedPOSIXLocale, p); /* Anything following the @ sign */
1189
/* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1190
How about 'russian' -> 'ru'?
1194
/* Was a correction made? */
1195
if(correctedPOSIXLocale != NULL)
1197
posixID = correctedPOSIXLocale;
1201
if(gCorrectedPOSIXLocale == NULL) {
1202
gCorrectedPOSIXLocale = correctedPOSIXLocale;
1203
correctedPOSIXLocale = NULL;
1207
if(correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1208
uprv_free(correctedPOSIXLocale);
1213
#elif defined(WIN32)
1214
UErrorCode status = U_ZERO_ERROR;
1215
LCID id = GetThreadLocale();
1216
const char* locID = T_convertToPosix(id, &status);
1218
if (U_FAILURE(status)) {
1223
#elif defined(XP_MAC)
1224
int32_t script = MAC_LC_INIT_NUMBER;
1225
/* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1226
int32_t region = MAC_LC_INIT_NUMBER;
1227
/* = GetScriptManagerVariable(smRegionCode);*/
1228
int32_t lang = MAC_LC_INIT_NUMBER;
1229
/* = GetScriptManagerVariable(smScriptLang);*/
1230
int32_t date_region = MAC_LC_INIT_NUMBER;
1232
int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1236
ih = (Intl1Hndl) GetIntlResource(1);
1238
date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1240
for (i = 0; i < count; i++) {
1241
if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1242
|| (mac_lc_recs[i].script == script))
1243
&& ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1244
|| (mac_lc_recs[i].region == region))
1245
&& ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1246
|| (mac_lc_recs[i].lang == lang))
1247
&& ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1248
|| (mac_lc_recs[i].date_region == date_region))
1251
posixID = mac_lc_recs[i].posixID;
1261
locID = getenv("LC_ALL");
1262
if (!locID || !*locID)
1263
locID = getenv("LANG");
1264
if (!locID || !*locID) {
1267
if (!stricmp(locID, "c") || !stricmp(locID, "posix") ||
1268
!stricmp(locID, "univ"))
1269
locID = "en_US_POSIX";
1272
#elif defined(OS400)
1273
/* locales are process scoped and are by definition thread safe */
1274
static char correctedLocale[64];
1275
const char *localeID = getenv("LC_ALL");
1278
if (localeID == NULL)
1279
localeID = getenv("LANG");
1280
if (localeID == NULL)
1281
localeID = setlocale(LC_ALL, NULL);
1282
/* Make sure we have something... */
1283
if (localeID == NULL)
1284
return "en_US_POSIX";
1286
/* Extract the locale name from the path. */
1287
if((p = uprv_strrchr(localeID, '/')) != NULL)
1289
/* Increment p to start of locale name. */
1294
/* Copy to work location. */
1295
uprv_strcpy(correctedLocale, localeID);
1297
/* Strip off the '.locale' extension. */
1298
if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1302
/* Upper case the locale name. */
1303
T_CString_toUpperCase(correctedLocale);
1305
/* See if we are using the POSIX locale. Any of the
1306
* following are equivalent and use the same QLGPGCMA
1309
if ((uprv_strcmp("C", correctedLocale) == 0) ||
1310
(uprv_strcmp("POSIX", correctedLocale) == 0) ||
1311
(uprv_strcmp("QLGPGCMA", correctedLocale) == 0))
1313
uprv_strcpy(correctedLocale, "en_US_POSIX");
1319
/* Lower case the lang portion. */
1320
for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1322
*p = uprv_tolower(*p);
1325
/* Adjust for Euro. After '_E' add 'URO'. */
1326
LocaleLen = uprv_strlen(correctedLocale);
1327
if (correctedLocale[LocaleLen - 2] == '_' &&
1328
correctedLocale[LocaleLen - 1] == 'E')
1330
uprv_strcat(correctedLocale, "URO");
1333
/* If using Lotus-based locale then convert to
1334
* equivalent non Lotus.
1336
else if (correctedLocale[LocaleLen - 2] == '_' &&
1337
correctedLocale[LocaleLen - 1] == 'L')
1339
correctedLocale[LocaleLen - 2] = 0;
1342
/* There are separate simplified and traditional
1343
* locales called zh_HK_S and zh_HK_T.
1345
else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1347
uprv_strcpy(correctedLocale, "zh_HK");
1350
/* A special zh_CN_GBK locale...
1352
else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1354
uprv_strcpy(correctedLocale, "zh_CN");
1359
return correctedLocale;
1364
U_CAPI const char* U_EXPORT2
1365
uprv_getDefaultCodepage()
1368
uint32_t ccsid = 37; /* Default to ibm-37 */
1369
static char codepage[16];
1370
Qwc_JOBI0400_t jobinfo;
1371
Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1373
EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1376
if (error.Bytes_Available == 0) {
1377
if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1378
ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1380
else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1381
ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1383
/* else use the default */
1385
sprintf(codepage,"ibm-%d", ccsid);
1388
#elif defined(OS390)
1389
static char codepage[16];
1390
sprintf(codepage,"%s-s390", nl_langinfo(CODESET));
1393
#elif defined(XP_MAC)
1394
return "ibm-1275"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1396
#elif defined(WIN32)
1397
static char codepage[16];
1398
sprintf(codepage, "cp%d", GetACP());
1401
#elif U_POSIX_LOCALE
1402
static char codesetName[100];
1405
const char *localeName = NULL;
1406
const char *defaultTable = NULL;
1408
uprv_memset(codesetName, 0, 100);
1409
localeName = uprv_getPOSIXID();
1410
if (localeName != NULL)
1412
uprv_strcpy(codesetName, localeName);
1413
if ((name = (uprv_strchr(codesetName, (int) '.'))) != NULL)
1415
/* strip the locale name and look at the suffix only */
1417
if ((euro = (uprv_strchr(name, (int)'@'))) != NULL)
1421
/* if we can find the codset name from setlocale, return that. */
1422
if (uprv_strlen(name) != 0)
1429
/* otherwise, try CTYPE */
1431
uprv_memset(codesetName, 0, 100);
1432
localeName = setlocale(LC_CTYPE, "");
1433
if (localeName != NULL)
1435
uprv_strcpy(codesetName, localeName);
1436
if ((name = (uprv_strchr(codesetName, (int) '.'))) != NULL)
1438
/* strip the locale name and look at the suffix only */
1440
if ((euro = (uprv_strchr(name, (int)'@'))) != NULL)
1444
/* if we can find the codset name from setlocale, return that. */
1445
if (uprv_strlen(name) != 0)
1451
if (strlen(codesetName) != 0)
1453
uprv_memset(codesetName, 0, 100);
1455
#if U_HAVE_NL_LANGINFO_CODESET
1457
const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1458
if (codeset != NULL) {
1459
uprv_strcpy(codesetName, codeset);
1463
if (uprv_strlen(codesetName) == 0)
1465
/* look up in srl's table */
1466
defaultTable = uprv_defaultCodePageForLocale(localeName);
1467
if (defaultTable != NULL)
1469
uprv_strcpy(codesetName, defaultTable);
1473
/* if the table lookup failed, return US ASCII (ISO 646). */
1474
uprv_strcpy(codesetName, "US-ASCII");
1483
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
1486
* These maps for ASCII to/from EBCDIC are from
1487
* "UTF-EBCDIC - EBCDIC-Friendly Unicode (or UCS) Transformation Format"
1488
* at http://www.unicode.org/unicode/reports/tr16/
1489
* (which should reflect codepage 1047)
1490
* but modified to explicitly exclude the variant
1491
* control and graphical characters that are in ASCII-based
1492
* codepages at 0x80 and above.
1493
* Also, unlike in Version 6.0 of the UTR on UTF-EBCDIC,
1494
* the Line Feed mapping varies according to the environment.
1496
* These tables do not establish a converter or a codepage.
1499
/* on S/390 Open Edition, ASCII 0xa (LF) maps to 0x15 and ISO-8 0x85 maps to 0x25 */
1505
/* the CDRA variation of 1047 is not currently used - see tables in #else below */
1506
/* in standard EBCDIC (CDRA), ASCII 0xa (LF) maps to 0x25 and ISO-8 0x85 maps to 0x15 */
1512
static const uint8_t asciiFromEbcdic[256]={
1513
0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7F, 0x00, 0x00, 0x00, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1514
0x10, 0x11, 0x12, 0x13, 0x00, A_15, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1C, 0x1D, 0x1E, 0x1F,
1515
0x00, 0x00, 0x00, 0x00, 0x00, A_25, 0x17, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
1516
0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1A,
1517
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
1518
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
1519
0x2D, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
1520
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
1521
0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1522
0x00, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1523
0x00, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00,
1524
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00,
1525
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1526
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1527
0x5C, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1528
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1531
static const uint8_t ebcdicFromAscii[256]={
1532
0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, E_LF, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
1533
0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
1534
0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
1535
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
1536
0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
1537
0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
1538
0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
1539
0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
1540
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1541
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1542
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1543
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1544
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1545
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1546
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1547
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1552
* These maps for ASCII to/from EBCDIC were generated
1553
* using the ICU converter for codepage 37 on 2000-may-22.
1554
* They explicitly exclude the variant
1555
* control and graphical characters that are in ASCII-based
1556
* codepages at 0x80 and above.
1558
* These tables do not establish a converter or a codepage.
1561
static const uint8_t asciiFromEbcdic[256]={
1562
0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1563
0x10, 0x11, 0x12, 0x13, 0x00, 0x00, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
1564
0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
1565
0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
1566
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
1567
0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x00,
1568
0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
1569
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
1570
0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1571
0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1572
0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1573
0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x00, 0x00, 0x00,
1574
0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1575
0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1576
0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1577
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1580
static const uint8_t ebcdicFromAscii[256]={
1581
0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1582
0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
1583
0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
1584
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
1585
0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
1586
0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xba, 0xe0, 0xbb, 0xb0, 0x6d,
1587
0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
1588
0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07,
1589
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1590
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1591
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1592
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1593
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1594
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1595
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1596
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1603
U_CAPI void U_EXPORT2
1604
u_charsToUChars(const char *cs, UChar *us, UTextOffset length) {
1606
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
1607
*us++=(UChar)(uint8_t)(*cs++);
1608
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
1609
*us++=(UChar)asciiFromEbcdic[(uint8_t)(*cs++)];
1611
# error U_CHARSET_FAMILY is not valid
1617
U_CAPI void U_EXPORT2
1618
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length) {
1620
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
1621
*cs++=(char)(*us++);
1622
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
1623
*cs++=(char)ebcdicFromAscii[(uint8_t)(*us++)];
1625
# error U_CHARSET_FAMILY is not valid
1631
/* end of platform-specific implementation */
1633
U_CAPI void U_EXPORT2
1634
u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1638
if(versionArray==NULL) {
1642
if(versionString!=NULL) {
1644
versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1645
if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1648
versionString=end+1;
1652
while(part<U_MAX_VERSION_LENGTH) {
1653
versionArray[part++]=0;
1657
U_CAPI void U_EXPORT2
1658
u_versionToString(UVersionInfo versionArray, char *versionString) {
1659
uint16_t count, part;
1662
if(versionString==NULL) {
1666
if(versionArray==NULL) {
1671
/* count how many fields need to be written */
1672
for(count=4; count>0 && versionArray[count-1]==0; --count) {
1679
/* write the first part */
1680
/* write the decimal field value */
1681
field=versionArray[0];
1683
*versionString++=(char)('0'+field/100);
1687
*versionString++=(char)('0'+field/10);
1690
*versionString++=(char)('0'+field);
1692
/* write the following parts */
1693
for(part=1; part<count; ++part) {
1694
/* write a dot first */
1695
*versionString++=U_VERSION_DELIMITER;
1697
/* write the decimal field value */
1698
field=versionArray[part];
1700
*versionString++=(char)('0'+field/100);
1704
*versionString++=(char)('0'+field/10);
1707
*versionString++=(char)('0'+field);
1714
U_CAPI void U_EXPORT2
1715
u_getVersion(UVersionInfo versionArray) {
1716
u_versionFromString(versionArray, U_ICU_VERSION);
1719
/* u_errorName() ------------------------------------------------------------ */
1721
static const char * const
1722
_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={
1723
"U_USING_FALLBACK_WARNING",
1724
"U_USING_DEFAULT_WARNING",
1725
"U_SAFECLONE_ALLOCATED_WARNING",
1726
"U_STATE_OLD_WARNING",
1727
"U_STRING_NOT_TERMINATED_WARNING"
1730
static const char * const
1731
_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={
1732
"U_BAD_VARIABLE_DEFINITION",
1735
"U_MALFORMED_SYMBOL_REFERENCE",
1736
"U_MALFORMED_UNICODE_ESCAPE",
1737
"U_MALFORMED_VARIABLE_DEFINITION",
1738
"U_MALFORMED_VARIABLE_REFERENCE",
1739
"U_MISMATCHED_SEGMENT_DELIMITERS",
1740
"U_MISPLACED_ANCHOR_START",
1741
"U_MISPLACED_CURSOR_OFFSET",
1742
"U_MISPLACED_QUANTIFIER",
1743
"U_MISSING_OPERATOR",
1744
"U_MISSING_SEGMENT_CLOSE",
1745
"U_MULTIPLE_ANTE_CONTEXTS",
1746
"U_MULTIPLE_CURSORS",
1747
"U_MULTIPLE_POST_CONTEXTS",
1748
"U_TRAILING_BACKSLASH",
1749
"U_UNDEFINED_SEGMENT_REFERENCE",
1750
"U_UNDEFINED_VARIABLE",
1751
"U_UNQUOTED_SPECIAL",
1752
"U_UNTERMINATED_QUOTE",
1753
"U_RULE_MASK_ERROR",
1754
"U_MISPLACED_COMPOUND_FILTER",
1755
"U_MULTIPLE_COMPOUND_FILTERS",
1756
"U_INVALID_RBT_SYNTAX",
1757
"U_INVALID_PROPERTY_PATTERN",
1758
"U_MALFORMED_PRAGMA",
1759
"U_UNCLOSED_SEGMENT",
1760
"U_ILLEGAL_CHAR_IN_SEGMENT",
1761
"U_VARIABLE_RANGE_EXHAUSTED",
1762
"U_VARIABLE_RANGE_OVERLAP",
1763
"U_ILLEGAL_CHARACTER",
1764
"U_INTERNAL_TRANSLITERATOR_ERROR",
1766
"U_INVALID_FUNCTION"
1769
static const char * const
1770
_uErrorName[U_STANDARD_ERROR_LIMIT]={
1773
"U_ILLEGAL_ARGUMENT_ERROR",
1774
"U_MISSING_RESOURCE_ERROR",
1775
"U_INVALID_FORMAT_ERROR",
1776
"U_FILE_ACCESS_ERROR",
1777
"U_INTERNAL_PROGRAM_ERROR",
1778
"U_MESSAGE_PARSE_ERROR",
1779
"U_MEMORY_ALLOCATION_ERROR",
1780
"U_INDEX_OUTOFBOUNDS_ERROR",
1782
"U_INVALID_CHAR_FOUND",
1783
"U_TRUNCATED_CHAR_FOUND",
1784
"U_ILLEGAL_CHAR_FOUND",
1785
"U_INVALID_TABLE_FORMAT",
1786
"U_INVALID_TABLE_FILE",
1787
"U_BUFFER_OVERFLOW_ERROR",
1788
"U_UNSUPPORTED_ERROR",
1789
"U_RESOURCE_TYPE_MISMATCH",
1790
"U_ILLEGAL_ESCAPE_SEQUENCE",
1791
"U_UNSUPPORTED_ESCAPE_SEQUENCE",
1792
"U_NO_SPACE_AVAILABLE",
1793
"U_CE_NOT_FOUND_ERROR",
1794
"U_PRIMARY_TOO_LONG_ERROR",
1795
"U_STATE_TOO_OLD_ERROR"
1797
static const char * const
1798
_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
1799
"U_UNEXPECTED_TOKEN",
1800
"U_MULTIPLE_DECIMAL_SEPERATORS",
1801
"U_MULTIPLE_EXPONENTIAL_SYMBOLS",
1802
"U_MALFORMED_EXPONENTIAL_PATTERN",
1803
"U_MULTIPLE_PERCENT_SYMBOLS",
1804
"U_MULTIPLE_PERMILL_SYMBOLS",
1805
"U_MULTIPLE_PAD_SPECIFIERS",
1806
"U_PATTERN_SYNTAX_ERROR",
1807
"U_ILLEGAL_PAD_POSITION",
1808
"U_UNMATCHED_BRACES",
1809
"U_UNSUPPORTED_PROPERTY",
1810
"U_UNSUPPORTED_ATTRIBUTE"
1813
U_CAPI const char * U_EXPORT2
1814
u_errorName(UErrorCode code) {
1815
if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) {
1816
return _uErrorName[code];
1817
} else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) {
1818
return _uErrorInfoName[code - U_ERROR_WARNING_START];
1819
} else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){
1820
return _uTransErrorName[code - U_PARSE_ERROR_START];
1821
} else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){
1822
return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START];
1824
return "[BOGUS UErrorCode]";
1829
* Hey, Emacs, please set the following:
1832
* indent-tabs-mode: nil