1
/*********************************************************
2
* Copyright (C) 2008 VMware, Inc. All rights reserved.
4
* This file is part of VMware View Open Client.
5
*********************************************************/
7
******************************************************************************
9
* Copyright (C) 1997-2007, International Business Machines
10
* Corporation and others. All Rights Reserved.
12
******************************************************************************
14
* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
16
* Date Name Description
17
* 04/14/97 aliu Creation.
18
* 04/24/97 aliu Added getDefaultDataDirectory() and
19
* getDefaultLocaleID().
20
* 04/28/97 aliu Rewritten to assume Unix and apply general methods
21
* for assumed case. Non-UNIX platforms must be
22
* special-cased. Rewrote numeric methods dealing
23
* with NaN and Infinity to be platform independent
24
* over all IEEE 754 platforms.
25
* 05/13/97 aliu Restored sign of timezone
26
* (semantics are hours West of GMT)
27
* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
29
* 07/22/98 stephen Added remainder, max, min, trunc
30
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
31
* 08/24/98 stephen Added longBitsFromDouble
32
* 09/08/98 stephen Minor changes for Mac Port
33
* 03/02/99 stephen Removed openFile(). Added AS400 support.
35
* 04/15/99 stephen Converted to C.
36
* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
37
* 08/04/99 jeffrey R. Added OS/2 changes
38
* 11/15/99 helena Integrated S/390 IEEE support.
39
* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
40
* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
41
******************************************************************************
45
* VMware change: disable some unused math functions to remove need
49
/* Define _XOPEN_SOURCE for Solaris and friends. */
50
/* NetBSD needs it to be >= 4 */
51
#if !defined(_XOPEN_SOURCE)
52
#if __STDC_VERSION__ >= 199901L
53
/* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
54
#define _XOPEN_SOURCE 600
56
#define _XOPEN_SOURCE 4
60
/* Make sure things like readlink and such functions work.
61
Poorly upgraded Solaris machines can't have this defined.
62
Cleanly installed Solaris can use this #define.
64
#if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L)
65
#define _XOPEN_SOURCE_EXTENDED 1
68
/* include ICU headers */
69
#include "unicode/utypes.h"
70
#include "unicode/putil.h"
71
#include "unicode/ustring.h"
80
/* Include standard headers. */
89
/* include system headers */
91
/* VMware change: comment out some lines below */
92
//# define WIN32_LEAN_AND_MEAN
93
//# define VC_EXTRALEAN
100
#elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
101
/* tzset isn't defined in strict ANSI on Cygwin. */
102
# undef __STRICT_ANSI__
105
# include <qusec.h> /* error code structure */
106
# include <qusrjobi.h>
107
# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
108
# include <mih/testptr.h> /* For uprv_maximumPtr */
109
#elif defined(XP_MAC)
111
# include <IntlResources.h>
113
# include <Folders.h>
114
# include <MacTypes.h>
115
# include <TextUtils.h>
116
# define ICU_NO_USER_DATA_OVERRIDE 1
118
#include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
119
#elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
123
#include <sys/neutrino.h>
127
#include <sys/time.h>
131
* Only include langinfo.h if we have a way to get the codeset. If we later
132
* depend on more feature, we can test on U_HAVE_NL_LANGINFO.
135
#if U_HAVE_NL_LANGINFO_CODESET
136
#include <langinfo.h>
139
/* Define the extension for data files, again... */
140
#define DATA_TYPE "dat"
142
/* Leave this copyright notice here! */
143
static const char copyright[] = U_COPYRIGHT_STRING;
145
/* floating point implementations ------------------------------------------- */
147
/* We return QNAN rather than SNAN*/
148
#define SIGN 0x80000000U
150
/* Make it easy to define certain types of constants */
152
int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
154
} BitPatternConversion;
155
static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
156
static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
158
/*---------------------------------------------------------------------------
160
Our general strategy is to assume we're on a POSIX platform. Platforms which
161
are non-POSIX must declare themselves so. The default POSIX implementation
162
will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
164
---------------------------------------------------------------------------*/
166
#if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
167
# undef U_POSIX_LOCALE
169
# define U_POSIX_LOCALE 1
173
WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
174
can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
178
u_topNBytesOfDouble(double* d, int n)
183
return (char*)(d + 1) - n;
189
u_bottomNBytesOfDouble(double* d, int n)
192
return (char*)(d + 1) - n;
198
#if defined(U_WINDOWS)
202
} FileTimeConversion; /* This is like a ULARGE_INTEGER */
204
/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
205
#define EPOCH_BIAS INT64_C(116444736000000000)
206
#define HECTONANOSECOND_PER_MILLISECOND 10000
210
/*---------------------------------------------------------------------------
211
Universal Implementations
212
These are designed to work on all platforms. Try these, and if they
213
don't work on your platform, then special case your platform with new
215
---------------------------------------------------------------------------*/
217
/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
218
U_CAPI UDate U_EXPORT2
225
uprv_memset( &tmrec, 0, sizeof(tmrec) );
229
t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
232
uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
233
t2 = mktime(&tmrec); /* seconds of current GMT*/
234
return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
235
#elif defined(U_WINDOWS)
237
FileTimeConversion winTime;
238
GetSystemTimeAsFileTime(&winTime.fileTime);
239
return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
242
struct timeval posixTime;
243
gettimeofday(&posixTime, NULL);
244
return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
248
return (UDate)epochtime * U_MILLIS_PER_SECOND;
252
/*-----------------------------------------------------------------------------
254
These methods detect and return NaN and infinity values for doubles
255
conforming to IEEE 754. Platforms which support this standard include X86,
256
Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
257
If this doesn't work on your platform, you have non-IEEE floating-point, and
258
will need to code your own versions. A naive implementation is to return 0.0
259
for getNaN and getInfinity, and false for isNaN and isInfinite.
260
---------------------------------------------------------------------------*/
262
U_CAPI UBool U_EXPORT2
263
uprv_isNaN(double number)
266
BitPatternConversion convertedNumber;
267
convertedNumber.d64 = number;
268
/* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
269
return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
272
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
274
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
277
return ((highBits & 0x7F080000L) == 0x7F080000L) &&
278
(lowBits == 0x00000000L);
281
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
282
/* you'll need to replace this default implementation with what's correct*/
283
/* for your platform.*/
284
return number != number;
288
U_CAPI UBool U_EXPORT2
289
uprv_isInfinite(double number)
292
BitPatternConversion convertedNumber;
293
convertedNumber.d64 = number;
294
/* Infinity is exactly 0x7FF0000000000000U. */
295
return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
297
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
299
uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
302
return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
305
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
306
/* value, you'll need to replace this default implementation with what's*/
307
/* correct for your platform.*/
308
return number == (2.0 * number);
312
U_CAPI UBool U_EXPORT2
313
uprv_isPositiveInfinity(double number)
315
#if IEEE_754 || defined(OS390)
316
return (UBool)(number > 0 && uprv_isInfinite(number));
318
return uprv_isInfinite(number);
322
U_CAPI UBool U_EXPORT2
323
uprv_isNegativeInfinity(double number)
325
#if IEEE_754 || defined(OS390)
326
return (UBool)(number < 0 && uprv_isInfinite(number));
329
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
331
return((highBits & SIGN) && uprv_isInfinite(number));
336
U_CAPI double U_EXPORT2
339
#if IEEE_754 || defined(OS390)
342
/* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
343
/* you'll need to replace this default implementation with what's correct*/
344
/* for your platform.*/
349
U_CAPI double U_EXPORT2
352
#if IEEE_754 || defined(OS390)
355
/* If your platform doesn't support IEEE 754 but *does* have an infinity*/
356
/* value, you'll need to replace this default implementation with what's*/
357
/* correct for your platform.*/
362
U_CAPI double U_EXPORT2
367
U_CAPI double U_EXPORT2
373
U_CAPI double U_EXPORT2
376
return uprv_floor(x + 0.5);
379
U_CAPI double U_EXPORT2
385
U_CAPI double U_EXPORT2
386
uprv_modf(double x, double* y)
391
U_CAPI double U_EXPORT2
392
uprv_fmod(double x, double y)
397
U_CAPI double U_EXPORT2
398
uprv_pow(double x, double y)
400
/* This is declared as "double pow(double x, double y)" */
404
U_CAPI double U_EXPORT2
405
uprv_pow10(int32_t x)
407
return pow(10.0, (double)x);
410
U_CAPI double U_EXPORT2
411
uprv_fmax(double x, double y)
416
/* first handle NaN*/
417
if(uprv_isNaN(x) || uprv_isNaN(y))
418
return uprv_getNaN();
420
/* check for -0 and 0*/
421
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
422
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
427
/* this should work for all flt point w/o NaN and Infpecial cases */
428
return (x > y ? x : y);
431
U_CAPI double U_EXPORT2
432
uprv_fmin(double x, double y)
437
/* first handle NaN*/
438
if(uprv_isNaN(x) || uprv_isNaN(y))
439
return uprv_getNaN();
441
/* check for -0 and 0*/
442
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
443
if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
448
/* this should work for all flt point w/o NaN and Inf special cases */
449
return (x > y ? y : x);
453
* Truncates the given double.
454
* trunc(3.3) = 3.0, trunc (-3.3) = -3.0
455
* This is different than calling floor() or ceil():
456
* floor(3.3) = 3, floor(-3.3) = -4
457
* ceil(3.3) = 4, ceil(-3.3) = -3
459
U_CAPI double U_EXPORT2
465
/* handle error cases*/
467
return uprv_getNaN();
468
if(uprv_isInfinite(d))
469
return uprv_getInfinity();
471
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
472
if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
478
return d >= 0 ? floor(d) : ceil(d);
484
* Return the largest positive number that can be represented by an integer
485
* type of arbitrary bit length.
487
U_CAPI double U_EXPORT2
488
uprv_maxMantissa(void)
490
return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
493
U_CAPI double U_EXPORT2
499
U_CAPI void * U_EXPORT2
500
uprv_maximumPtr(void * base)
504
* With the provided function we should never be out of range of a given segment
505
* (a traditional/typical segment that is). Our segments have 5 bytes for the
506
* id and 3 bytes for the offset. The key is that the casting takes care of
507
* only retrieving the offset portion minus x1000. Hence, the smallest offset
508
* seen in a program is x001000 and when casted to an int would be 0.
509
* That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
511
* Currently, 16MB is the current addressing limitation on i5/OS if the activation is
512
* non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
513
* This function determines the activation based on the pointer that is passed in and
514
* calculates the appropriate maximum available size for
515
* each pointer type (TERASPACE and non-TERASPACE)
517
* Unlike other operating systems, the pointer model isn't determined at
518
* compile time on i5/OS.
520
if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
521
/* if it is a TERASPACE pointer the max is 2GB - 4k */
522
return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
524
/* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
525
return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
528
return U_MAX_PTR(base);
532
/*---------------------------------------------------------------------------
533
Platform-specific Implementations
534
Try these, and if they don't work on your platform, then special case your
535
platform with new implementations.
536
---------------------------------------------------------------------------*/
538
/* Generic time zone layer -------------------------------------------------- */
540
/* Time zone utilities */
541
U_CAPI void U_EXPORT2
547
/* no initialization*/
551
U_CAPI int32_t U_EXPORT2
563
uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
564
dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
565
t1 = mktime(&tmrec); /* local time in seconds*/
566
uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
567
t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
569
/* imitate NT behaviour, which returns same timezone offset to GMT for
577
/* Note that U_TZNAME does *not* have to be tzname, but if it is,
578
some platforms need to have it declared here. */
580
#if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
581
/* RS6000 and others reject char **tzname. */
582
extern U_IMPORT char *U_TZNAME[];
585
#if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
586
/* These platforms are likely to use Olson timezone IDs. */
587
#define CHECK_LOCALTIME_LINK 1
589
#define TZDEFAULT "/etc/localtime"
590
#define TZZONEINFO "/usr/share/zoneinfo/"
593
#define TZZONEINFO (TZDIR "/")
595
static char gTimeZoneBuffer[PATH_MAX];
596
static char *gTimeZoneBufferPtr = NULL;
600
#define isNonDigit(ch) (ch < '0' || '9' < ch)
601
static UBool isValidOlsonID(const char *id) {
604
/* Determine if this is something like Iceland (Olson ID)
605
or AST4ADT (non-Olson ID) */
606
while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
610
/* If we went through the whole string, then it might be okay.
611
The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
612
"GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
613
The rest of the time it could be an Olson ID. George */
614
return (UBool)(id[idx] == 0
615
|| uprv_strcmp(id, "PST8PDT") == 0
616
|| uprv_strcmp(id, "MST7MDT") == 0
617
|| uprv_strcmp(id, "CST6CDT") == 0
618
|| uprv_strcmp(id, "EST5EDT") == 0);
622
#if defined(U_TZNAME) && !defined(U_WINDOWS)
624
#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
625
typedef struct OffsetZoneMapping {
626
int32_t offsetSeconds;
627
int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
634
This list tries to disambiguate a set of abbreviated timezone IDs and offsets
635
and maps it to an Olson ID.
636
Before adding anything to this list, take a look at
637
icu/source/tools/tzcode/tz.alias
638
Sometimes no daylight savings (0) is important to define due to aliases.
639
This list can be tested with icu/source/test/compat/tzone.pl
640
More values could be added to daylightType to increase precision.
642
static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
643
{-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
644
{-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
645
{-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
646
{-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
647
{-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
648
{-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
649
{-36000, 2, "EST", "EST", "Australia/Sydney"},
650
{-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
651
{-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
652
{-34200, 2, "CST", "CST", "Australia/South"},
653
{-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
654
{-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
655
{-31500, 2, "CWST", "CWST", "Australia/Eucla"},
656
{-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
657
{-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
658
{-28800, 2, "WST", "WST", "Australia/West"},
659
{-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
660
{-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
661
{-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
662
{-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
663
{-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
664
{-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
665
{-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
666
{-14400, 1, "AZT", "AZST", "Asia/Baku"},
667
{-10800, 1, "AST", "ADT", "Asia/Baghdad"},
668
{-10800, 1, "MSK", "MSD", "Europe/Moscow"},
669
{-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
670
{-7200, 0, "EET", "CEST", "Africa/Tripoli"},
671
{-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
672
{-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
673
{-3600, 0, "CET", "WEST", "Africa/Algiers"},
674
{-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
675
{0, 1, "GMT", "IST", "Europe/Dublin"},
676
{0, 1, "GMT", "BST", "Europe/London"},
677
{0, 0, "WET", "WEST", "Africa/Casablanca"},
678
{0, 0, "WET", "WET", "Africa/El_Aaiun"},
679
{3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
680
{3600, 1, "EGT", "EGST", "America/Scoresbysund"},
681
{10800, 1, "PMST", "PMDT", "America/Miquelon"},
682
{10800, 2, "UYT", "UYST", "America/Montevideo"},
683
{10800, 1, "WGT", "WGST", "America/Godthab"},
684
{10800, 2, "BRT", "BRST", "Brazil/East"},
685
{12600, 1, "NST", "NDT", "America/St_Johns"},
686
{14400, 1, "AST", "ADT", "Canada/Atlantic"},
687
{14400, 2, "AMT", "AMST", "America/Cuiaba"},
688
{14400, 2, "CLT", "CLST", "Chile/Continental"},
689
{14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
690
{14400, 2, "PYT", "PYST", "America/Asuncion"},
691
{18000, 1, "CST", "CDT", "America/Havana"},
692
{18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
693
{21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
694
{21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
695
{21600, 0, "CST", "CDT", "America/Guatemala"},
696
{21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
697
{25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
698
{28800, 0, "PST", "PST", "Pacific/Pitcairn"},
699
{28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
700
{32400, 1, "AKST", "AKDT", "US/Alaska"},
701
{36000, 1, "HAST", "HADT", "US/Aleutian"}
704
/*#define DEBUG_TZNAME*/
706
static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
710
fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
712
for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
714
if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
715
&& daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
716
&& strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
717
&& strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
719
return OFFSET_ZONE_MAPPINGS[idx].olsonID;
726
U_CAPI const char* U_EXPORT2
729
const char *tzid = NULL;
731
tzid = uprv_detectWindowsTimeZone();
738
/*#if defined(U_DARWIN)
741
tzid = getenv("TZFILE");
747
/* This code can be temporarily disabled to test tzname resolution later on. */
750
if (tzid != NULL && isValidOlsonID(tzid))
752
/* This might be a good Olson ID. */
753
if (uprv_strncmp(tzid, "posix/", 6) == 0
754
|| uprv_strncmp(tzid, "right/", 6) == 0)
756
/* Remove the posix/ or right/ prefix. */
761
/* else U_TZNAME will give a better result. */
764
#if defined(CHECK_LOCALTIME_LINK)
765
/* Caller must handle threading issues */
766
if (gTimeZoneBufferPtr == NULL) {
768
This is a trick to look at the name of the link to get the Olson ID
769
because the tzfile contents is underspecified.
770
This isn't guaranteed to work because it may not be a symlink.
772
int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
774
int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
775
gTimeZoneBuffer[ret] = 0;
776
if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
777
&& isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
779
return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
784
return gTimeZoneBufferPtr;
790
#if !defined(U_WINDOWS)
792
U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
793
So we remap the abbreviation to an olson ID.
795
Since Windows exposes a little more timezone information,
796
we normally don't use this code on Windows because
797
uprv_detectWindowsTimeZone should have already given the correct answer.
800
struct tm juneSol, decemberSol;
802
static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
803
static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
805
/* This probing will tell us when daylight savings occurs. */
806
localtime_r(&juneSolstice, &juneSol);
807
localtime_r(&decemberSolstice, &decemberSol);
808
daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
809
tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
821
/* Get and set the ICU data directory --------------------------------------- */
823
static char *gDataDirectory = NULL;
825
static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
828
static UBool U_CALLCONV putil_cleanup(void)
830
if (gDataDirectory && *gDataDirectory) {
831
uprv_free(gDataDirectory);
833
gDataDirectory = NULL;
835
if (gCorrectedPOSIXLocale) {
836
uprv_free(gCorrectedPOSIXLocale);
837
gCorrectedPOSIXLocale = NULL;
844
* Set the data directory.
845
* Make a copy of the passed string, and set the global data dir to point to it.
846
* TODO: see bug #2849, regarding thread safety.
848
U_CAPI void U_EXPORT2
849
u_setDataDirectory(const char *directory) {
853
if(directory==NULL || *directory==0) {
854
/* A small optimization to prevent the malloc and copy when the
855
shared library is used, and this is a way to make sure that NULL
858
newDataDir = (char *)"";
861
length=(int32_t)uprv_strlen(directory);
862
newDataDir = (char *)uprv_malloc(length + 2);
863
uprv_strcpy(newDataDir, directory);
865
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
868
while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
869
*p = U_FILE_SEP_CHAR;
876
if (gDataDirectory && *gDataDirectory) {
877
uprv_free(gDataDirectory);
879
gDataDirectory = newDataDir;
880
ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
884
U_CAPI UBool U_EXPORT2
885
uprv_pathIsAbsolute(const char *path)
887
if(!path || !*path) {
891
if(*path == U_FILE_SEP_CHAR) {
895
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
896
if(*path == U_FILE_ALT_SEP_CHAR) {
901
#if defined(U_WINDOWS)
902
if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
903
((path[0] >= 'a') && (path[0] <= 'z'))) &&
912
U_CAPI const char * U_EXPORT2
913
u_getDataDirectory(void) {
914
const char *path = NULL;
916
/* if we have the directory, then return it immediately */
917
UMTX_CHECK(NULL, gDataDirectory, path);
924
When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
925
override ICU's data with the ICU_DATA environment variable. This prevents
926
problems where multiple custom copies of ICU's specific version of data
927
are installed on a system. Either the application must define the data
928
directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
929
ICU, set the data with udata_setCommonData or trust that all of the
930
required data is contained in ICU's data library that contains
931
the entry point defined by U_ICUDATA_ENTRY_POINT.
933
There may also be some platforms where environment variables
936
# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
937
/* First try to get the environment variable */
938
path=getenv("ICU_DATA");
941
/* ICU_DATA_DIR may be set as a compile option */
943
if(path==NULL || *path==0) {
949
/* It looks really bad, set it to something. */
953
u_setDataDirectory(path);
954
return gDataDirectory;
961
/* Macintosh-specific locale information ------------------------------------ */
972
/* Todo: This will be updated with a newer version from www.unicode.org web
973
page when it's available.*/
974
#define MAC_LC_MAGIC_NUMBER (-5)
975
#define MAC_LC_INIT_NUMBER (-9)
977
static const mac_lc_rec mac_lc_recs[] = {
978
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
980
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
982
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
984
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
986
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
988
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
990
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
991
/* French for Belgium or Lxembourg*/
992
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
994
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
996
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
998
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1000
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1002
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1004
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1006
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1007
/* the Arabic world (?)*/
1008
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1010
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1011
/* French for Switzerland*/
1012
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1013
/* German for Switzerland*/
1014
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1016
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1018
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1020
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1022
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1024
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1025
/* Croatian system for Yugoslavia*/
1026
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1027
/* Hindi system for India*/
1028
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1030
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1032
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1034
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1036
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1038
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1040
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1041
/* Lapland [Ask Rich for the data. HS]*/
1042
/*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1044
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1046
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1048
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1050
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1052
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1053
/* People's Republic of China*/
1054
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1056
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1059
/* fallback is en_US*/
1060
MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1061
MAC_LC_MAGIC_NUMBER, "en_US"
1067
/* Return just the POSIX id, whatever happens to be in it */
1068
static const char *uprv_getPOSIXID(void)
1070
static const char* posixID = NULL;
1073
* On Solaris two different calls to setlocale can result in
1074
* different values. Only get this value once.
1076
* We must check this first because an application can set this.
1078
* LC_ALL can't be used because it's platform dependent. The LANG
1079
* environment variable seems to affect LC_CTYPE variable by default.
1080
* Here is what setlocale(LC_ALL, NULL) can return.
1081
* HPUX can return 'C C C C C C C'
1082
* Solaris can return /en_US/C/C/C/C/C on the second try.
1083
* Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1085
* The default codepage detection also needs to use LC_CTYPE.
1087
* Do not call setlocale(LC_*, "")! Using an empty string instead
1088
* of NULL, will modify the libc behavior.
1090
posixID = setlocale(LC_CTYPE, NULL);
1092
|| (uprv_strcmp("C", posixID) == 0)
1093
|| (uprv_strcmp("POSIX", posixID) == 0))
1095
/* Maybe we got some garbage. Try something more reasonable */
1096
posixID = getenv("LC_ALL");
1098
posixID = getenv("LC_CTYPE");
1100
posixID = getenv("LANG");
1106
|| (uprv_strcmp("C", posixID) == 0)
1107
|| (uprv_strcmp("POSIX", posixID) == 0))
1109
/* Nothing worked. Give it a nice POSIX default value. */
1110
posixID = "en_US_POSIX";
1118
/* NOTE: The caller should handle thread safety */
1119
U_CAPI const char* U_EXPORT2
1120
uprv_getDefaultLocaleID()
1124
Note that: (a '!' means the ID is improper somehow)
1125
LC_ALL ----> default_loc codepage
1126
--------------------------------------------------------
1131
ab_CD.EF@GH ab_CD_GH EF
1133
Some 'improper' ways to do the same as above:
1134
! ab_CD@GH.EF ab_CD_GH EF
1135
! ab_CD.EF@GH.IJ ab_CD_GH EF
1136
! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1141
The variant cannot have dots in it.
1142
The 'rightmost' variant (@xxx) wins.
1143
The leftmost codepage (.xxx) wins.
1145
char *correctedPOSIXLocale = 0;
1146
const char* posixID = uprv_getPOSIXID();
1151
/* Format: (no spaces)
1152
ll [ _CC ] [ . MM ] [ @ VV]
1154
l = lang, C = ctry, M = charmap, V = variant
1157
if (gCorrectedPOSIXLocale != NULL) {
1158
return gCorrectedPOSIXLocale;
1161
if ((p = uprv_strchr(posixID, '.')) != NULL) {
1162
/* assume new locale can't be larger than old one? */
1163
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1164
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1165
correctedPOSIXLocale[p-posixID] = 0;
1167
/* do not copy after the @ */
1168
if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1169
correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1173
/* Note that we scan the *uncorrected* ID. */
1174
if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1175
if (correctedPOSIXLocale == NULL) {
1176
correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1177
uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1178
correctedPOSIXLocale[p-posixID] = 0;
1182
/* Take care of any special cases here.. */
1183
if (!uprv_strcmp(p, "nynorsk")) {
1185
/* Don't worry about no__NY. In practice, it won't appear. */
1188
if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1189
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1192
uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1195
if ((q = uprv_strchr(p, '.')) != NULL) {
1196
/* How big will the resulting string be? */
1197
len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1198
uprv_strncat(correctedPOSIXLocale, p, q-p);
1199
correctedPOSIXLocale[len] = 0;
1202
/* Anything following the @ sign */
1203
uprv_strcat(correctedPOSIXLocale, p);
1206
/* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1207
* How about 'russian' -> 'ru'?
1208
* Many of the other locales using ISO codes will be handled by the
1209
* canonicalization functions in uloc_getDefault.
1213
/* Was a correction made? */
1214
if (correctedPOSIXLocale != NULL) {
1215
posixID = correctedPOSIXLocale;
1218
/* copy it, just in case the original pointer goes away. See j2395 */
1219
correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1220
posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1223
if (gCorrectedPOSIXLocale == NULL) {
1224
gCorrectedPOSIXLocale = correctedPOSIXLocale;
1225
ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1226
correctedPOSIXLocale = NULL;
1229
if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1230
uprv_free(correctedPOSIXLocale);
1235
#elif defined(U_WINDOWS)
1236
UErrorCode status = U_ZERO_ERROR;
1237
LCID id = GetThreadLocale();
1238
const char* locID = uprv_convertToPosix(id, &status);
1240
if (U_FAILURE(status)) {
1245
#elif defined(XP_MAC)
1246
int32_t script = MAC_LC_INIT_NUMBER;
1247
/* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1248
int32_t region = MAC_LC_INIT_NUMBER;
1249
/* = GetScriptManagerVariable(smRegionCode);*/
1250
int32_t lang = MAC_LC_INIT_NUMBER;
1251
/* = GetScriptManagerVariable(smScriptLang);*/
1252
int32_t date_region = MAC_LC_INIT_NUMBER;
1253
const char* posixID = 0;
1254
int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1258
ih = (Intl1Hndl) GetIntlResource(1);
1260
date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1262
for (i = 0; i < count; i++) {
1263
if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1264
|| (mac_lc_recs[i].script == script))
1265
&& ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1266
|| (mac_lc_recs[i].region == region))
1267
&& ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1268
|| (mac_lc_recs[i].lang == lang))
1269
&& ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1270
|| (mac_lc_recs[i].date_region == date_region))
1273
posixID = mac_lc_recs[i].posixID;
1280
#elif defined(OS400)
1281
/* locales are process scoped and are by definition thread safe */
1282
static char correctedLocale[64];
1283
const char *localeID = getenv("LC_ALL");
1286
if (localeID == NULL)
1287
localeID = getenv("LANG");
1288
if (localeID == NULL)
1289
localeID = setlocale(LC_ALL, NULL);
1290
/* Make sure we have something... */
1291
if (localeID == NULL)
1292
return "en_US_POSIX";
1294
/* Extract the locale name from the path. */
1295
if((p = uprv_strrchr(localeID, '/')) != NULL)
1297
/* Increment p to start of locale name. */
1302
/* Copy to work location. */
1303
uprv_strcpy(correctedLocale, localeID);
1305
/* Strip off the '.locale' extension. */
1306
if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1310
/* Upper case the locale name. */
1311
T_CString_toUpperCase(correctedLocale);
1313
/* See if we are using the POSIX locale. Any of the
1314
* following are equivalent and use the same QLGPGCMA
1316
* QLGPGCMA2 means UCS2
1317
* QLGPGCMA_4 means UTF-32
1318
* QLGPGCMA_8 means UTF-8
1320
if ((uprv_strcmp("C", correctedLocale) == 0) ||
1321
(uprv_strcmp("POSIX", correctedLocale) == 0) ||
1322
(uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1324
uprv_strcpy(correctedLocale, "en_US_POSIX");
1330
/* Lower case the lang portion. */
1331
for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1333
*p = uprv_tolower(*p);
1336
/* Adjust for Euro. After '_E' add 'URO'. */
1337
LocaleLen = uprv_strlen(correctedLocale);
1338
if (correctedLocale[LocaleLen - 2] == '_' &&
1339
correctedLocale[LocaleLen - 1] == 'E')
1341
uprv_strcat(correctedLocale, "URO");
1344
/* If using Lotus-based locale then convert to
1345
* equivalent non Lotus.
1347
else if (correctedLocale[LocaleLen - 2] == '_' &&
1348
correctedLocale[LocaleLen - 1] == 'L')
1350
correctedLocale[LocaleLen - 2] = 0;
1353
/* There are separate simplified and traditional
1354
* locales called zh_HK_S and zh_HK_T.
1356
else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1358
uprv_strcpy(correctedLocale, "zh_HK");
1361
/* A special zh_CN_GBK locale...
1363
else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1365
uprv_strcpy(correctedLocale, "zh_CN");
1370
return correctedLocale;
1377
Due to various platform differences, one platform may specify a charset,
1378
when they really mean a different charset. Remap the names so that they are
1379
compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1380
here. Before adding anything to this function, please consider adding unique
1381
names to the ICU alias table in the data directory.
1384
remapPlatformDependentCodepage(const char *locale, const char *name) {
1385
if (locale != NULL && *locale == 0) {
1386
/* Make sure that an empty locale is handled the same way. */
1393
if (uprv_strcmp(name, "IBM-943") == 0) {
1394
/* Use the ASCII compatible ibm-943 */
1397
else if (uprv_strcmp(name, "IBM-1252") == 0) {
1398
/* Use the windows-1252 that contains the Euro */
1401
#elif defined(U_SOLARIS)
1402
if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1403
/* Solaris underspecifies the "EUC" name. */
1404
if (uprv_strcmp(locale, "zh_CN") == 0) {
1407
else if (uprv_strcmp(locale, "zh_TW") == 0) {
1410
else if (uprv_strcmp(locale, "ko_KR") == 0) {
1414
else if (uprv_strcmp(name, "eucJP") == 0) {
1416
ibm-954 is the best match.
1417
ibm-33722 is the default for eucJP (similar to Windows).
1421
#elif defined(U_DARWIN)
1422
if (locale == NULL && *name == 0) {
1424
No locale was specified, and an empty name was passed in.
1425
This usually indicates that nl_langinfo didn't return valid information.
1426
Mac OS X uses UTF-8 by default (especially the locale data and console).
1430
#elif defined(U_HPUX)
1431
if (uprv_strcmp(name, "eucJP") == 0) {
1433
ibm-1350 is the best match, but unavailable.
1434
ibm-954 is mostly a superset of ibm-1350.
1435
ibm-33722 is the default for eucJP (similar to Windows).
1439
#elif defined(U_LINUX)
1440
if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1441
/* Linux underspecifies the "EUC" name. */
1442
if (uprv_strcmp(locale, "korean") == 0) {
1445
else if (uprv_strcmp(locale, "japanese") == 0) {
1446
/* See comment below about eucJP */
1450
else if (uprv_strcmp(name, "eucjp") == 0) {
1452
ibm-1350 is the best match, but unavailable.
1453
ibm-954 is mostly a superset of ibm-1350.
1454
ibm-33722 is the default for eucJP (similar to Windows).
1459
/* return NULL when "" is passed in */
1467
getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1469
char localeBuf[100];
1470
const char *name = NULL;
1471
char *variant = NULL;
1473
if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1474
size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1475
uprv_strncpy(localeBuf, localeName, localeCapacity);
1476
localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1477
name = uprv_strncpy(buffer, name+1, buffCapacity);
1478
buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1479
if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1482
name = remapPlatformDependentCodepage(localeBuf, name);
1489
int_getDefaultCodepage()
1492
uint32_t ccsid = 37; /* Default to ibm-37 */
1493
static char codepage[64];
1494
Qwc_JOBI0400_t jobinfo;
1495
Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1497
EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1500
if (error.Bytes_Available == 0) {
1501
if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1502
ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1504
else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1505
ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1507
/* else use the default */
1509
sprintf(codepage,"ibm-%d", ccsid);
1512
#elif defined(OS390)
1513
static char codepage[64];
1514
sprintf(codepage,"%63s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1515
codepage[63] = 0; /* NULL terminate */
1518
#elif defined(XP_MAC)
1519
return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1521
#elif defined(U_WINDOWS)
1522
static char codepage[64];
1523
sprintf(codepage, "windows-%d", GetACP());
1526
#elif U_POSIX_LOCALE
1527
static char codesetName[100];
1528
const char *localeName = NULL;
1529
const char *name = NULL;
1531
uprv_memset(codesetName, 0, sizeof(codesetName));
1533
/* Use setlocale in a nice way, and then check some environment variables.
1534
Maybe the application used setlocale already.
1536
localeName = uprv_getPOSIXID();
1537
name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1539
/* if we can find the codeset name from setlocale, return that. */
1542
/* else "C" was probably returned. That's underspecified. */
1544
#if U_HAVE_NL_LANGINFO_CODESET
1546
uprv_memset(codesetName, 0, sizeof(codesetName));
1548
/* When available, check nl_langinfo because it usually gives more
1549
useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1550
nl_langinfo may use the same buffer as setlocale. */
1552
const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1553
codeset = remapPlatformDependentCodepage(NULL, codeset);
1554
if (codeset != NULL) {
1555
uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1556
codesetName[sizeof(codesetName)-1] = 0;
1562
if (*codesetName == 0)
1564
/* Everything failed. Return US ASCII (ISO 646). */
1565
(void)uprv_strcpy(codesetName, "US-ASCII");
1574
U_CAPI const char* U_EXPORT2
1575
uprv_getDefaultCodepage()
1577
static char const *name = NULL;
1580
name = int_getDefaultCodepage();
1587
/* end of platform-specific implementation -------------- */
1589
/* version handling --------------------------------------------------------- */
1591
U_CAPI void U_EXPORT2
1592
u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1596
if(versionArray==NULL) {
1600
if(versionString!=NULL) {
1602
versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1603
if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1606
versionString=end+1;
1610
while(part<U_MAX_VERSION_LENGTH) {
1611
versionArray[part++]=0;
1615
U_CAPI void U_EXPORT2
1616
u_versionToString(UVersionInfo versionArray, char *versionString) {
1617
uint16_t count, part;
1620
if(versionString==NULL) {
1624
if(versionArray==NULL) {
1629
/* count how many fields need to be written */
1630
for(count=4; count>0 && versionArray[count-1]==0; --count) {
1637
/* write the first part */
1638
/* write the decimal field value */
1639
field=versionArray[0];
1641
*versionString++=(char)('0'+field/100);
1645
*versionString++=(char)('0'+field/10);
1648
*versionString++=(char)('0'+field);
1650
/* write the following parts */
1651
for(part=1; part<count; ++part) {
1652
/* write a dot first */
1653
*versionString++=U_VERSION_DELIMITER;
1655
/* write the decimal field value */
1656
field=versionArray[part];
1658
*versionString++=(char)('0'+field/100);
1662
*versionString++=(char)('0'+field/10);
1665
*versionString++=(char)('0'+field);
1672
U_CAPI void U_EXPORT2
1673
u_getVersion(UVersionInfo versionArray) {
1674
u_versionFromString(versionArray, U_ICU_VERSION);
1678
* Hey, Emacs, please set the following:
1681
* indent-tabs-mode: nil