2
******************************************************************************
4
* Copyright (C) 1999-2001, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
******************************************************************************
10
* tab size: 8 (not used)
13
* created on: 1999oct25
14
* created by: Markus W. Scherer
17
#include "unicode/utypes.h"
18
#include "unicode/putil.h"
22
#include "unicode/udata.h"
23
#include "unicode/uversion.h"
31
/***********************************************************************
33
* Notes on the organization of the ICU data implementation
35
* All of the public API is defined in udata.h
37
* The implementation is split into several files...
39
* - udata.c (this file) contains higher level code that knows about
40
* the search paths for locating data, caching opened data, etc.
42
* - umapfile.c contains the low level platform-specific code for actually loading
43
* (memory mapping, file reading, whatever) data into memory.
45
* - ucmndata.c deals with the tables of contents of ICU data items within
46
* an ICU common format data file. The implementation includes
47
* an abstract interface and support for multiple TOC formats.
48
* All knowledge of any specific TOC format is encapsulated here.
50
* - udatamem.c has code for managing UDataMemory structs. These are little
51
* descriptor objects for blocks of memory holding ICU data of
55
/* configuration ---------------------------------------------------------- */
57
/* If you are excruciatingly bored turn this on .. */
58
/* #define UDATA_DEBUG 1 */
60
#if defined(UDATA_DEBUG)
67
/***********************************************************************
69
* static (Global) data
71
************************************************************************/
72
static UDataMemory *gCommonICUData = NULL; /* Pointer to the common ICU data. */
73
/* May be updated once, if we started with */
74
/* a stub or subset library. */
76
static UDataMemory *gStubICUData = NULL; /* If gCommonICUData does get updated, remember */
77
/* the original one so that it can be cleaned */
78
/* up when ICU is shut down. */
80
static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */
86
if (gCommonDataCache) { /* Delete the cache of user data mappings. */
87
uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */
88
gCommonDataCache = NULL; /* Cleanup is not thread safe. */
91
if (gCommonICUData != NULL) {
92
udata_close(gCommonICUData); /* Clean up common ICU Data */
93
gCommonICUData = NULL;
96
if (gStubICUData != NULL) {
97
udata_close(gStubICUData); /* Clean up the stub ICU Data */
102
return TRUE; /* Everything was cleaned up */
109
* setCommonICUData. Set a UDataMemory to be the global ICU Data
112
setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */
113
UDataMemory *oldData, /* Old ICUData ptr. Overwrite of this value is ok, */
114
/* of any others is not. */
115
UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */
116
/* changed by another thread before we got to it. */
119
UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr);
120
if (U_FAILURE(*pErr)) {
124
/* For the assignment, other threads must cleanly see either the old */
125
/* or the new, not some partially initialized new. The old can not be */
126
/* deleted - someone may still have a pointer to it lying around in */
128
UDatamemory_assign(newCommonData, pData);
130
if (gCommonICUData==oldData) {
131
gStubICUData = gCommonICUData; /* remember the old Common Data, so it can be cleaned up. */
132
gCommonICUData = newCommonData;
136
*pErr = U_USING_DEFAULT_WARNING;
138
uprv_free(newCommonData);
147
strcpy_returnEnd(char *dest, const char *src) {
148
while((*dest=*src)!=0) {
155
/*------------------------------------------------------------------------------
157
* computeDirPath given a user-supplied path of an item to be opened,
159
* - the full directory path to be used
160
* when opening the file.
161
* - Pointer to null at end of above returned path
164
* path: input path. Buffer is not altered.
165
* pathBuffer: Output buffer. Any contents are overwritten.
168
* Pointer to null termination in returned pathBuffer.
170
* TODO: This works the way ICU historically has, but the
171
* whole data fallback search path is so complicated that
172
* proabably almost no one will ever really understand it,
173
* the potential for confusion is large. (It's not just
174
* this one function, but the whole scheme.)
176
*------------------------------------------------------------------------------*/
178
uprv_computeDirPath(const char *path, char *pathBuffer) {
179
char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */
180
int pathLen; /* Length of the returned directory path */
184
finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR);
188
if (finalSlash == 0) {
189
/* No user-supplied path.
190
* Copy the ICU_DATA path to the path buffer and return that*/
191
const char *icuDataDir;
192
icuDataDir=u_getDataDirectory();
193
if(icuDataDir!=NULL && *icuDataDir!=0) {
194
return strcpy_returnEnd(pathBuffer, icuDataDir);
196
/* there is no icuDataDir either. Just return the empty pathBuffer. */
201
/* User supplied path did contain a directory portion.
202
* Copy it to the output path buffer */
203
pathLen = finalSlash - path + 1;
204
uprv_memcpy(pathBuffer, path, pathLen);
205
*(pathBuffer+pathLen) = 0;
206
return pathBuffer+pathLen;
211
findBasename(const char *path) {
212
const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
221
/*----------------------------------------------------------------------*
223
* Cache for common data *
224
* Functions for looking up or adding entries to a cache of *
225
* data that has been previously opened. Avoids a potentially *
226
* expensive operation of re-opening the data for subsequent *
229
* Data remains cached for the duration of the process. *
231
*----------------------------------------------------------------------*/
233
typedef struct DataCacheElement {
241
* Deleter function for DataCacheElements.
242
* udata cleanup function closes the hash table; hash table in turn calls back to
243
* here for each entry.
245
static void U_EXPORT2 U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
246
DataCacheElement *p = (DataCacheElement *)pDCEl;
247
udata_close(p->item); /* unmaps storage */
248
uprv_free(p->name); /* delete the hash key string. */
249
uprv_free(pDCEl); /* delete 'this' */
252
/* udata_getCacheHashTable()
253
* Get the hash table used to store the data cache entries.
254
* Lazy create it if it doesn't yet exist.
256
static UHashtable *udata_getHashTable() {
257
UErrorCode err = U_ZERO_ERROR;
259
if (gCommonDataCache != NULL) {
260
return gCommonDataCache;
263
if (gCommonDataCache == NULL) {
264
gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, &err);
265
uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
269
if (U_FAILURE(err)) {
270
return NULL; /* TODO: handle this error better. */
272
return gCommonDataCache;
277
static UDataMemory *udata_findCachedData(const char *path)
280
UDataMemory *retVal = NULL;
281
DataCacheElement *el;
282
const char *baseName;
284
baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */
285
htable = udata_getHashTable();
287
el = (DataCacheElement *)uhash_get(htable, baseName);
296
static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
297
DataCacheElement *newElement;
298
const char *baseName;
301
UDataMemory *oldValue = NULL;
303
if (U_FAILURE(*pErr)) {
307
/* Create a new DataCacheElement - the thingy we store in the hash table -
308
* and copy the supplied path and UDataMemoryItems into it.
310
newElement = uprv_malloc(sizeof(DataCacheElement));
311
if (newElement == NULL) {
312
*pErr = U_MEMORY_ALLOCATION_ERROR;
315
newElement->item = UDataMemory_createNewInstance(pErr);
316
if (U_FAILURE(*pErr)) {
319
UDatamemory_assign(newElement->item, item);
321
baseName = findBasename(path);
322
nameLen = uprv_strlen(baseName);
323
newElement->name = uprv_malloc(nameLen+1);
324
if (newElement->name == NULL) {
325
*pErr = U_MEMORY_ALLOCATION_ERROR;
328
uprv_strcpy(newElement->name, baseName);
330
/* Stick the new DataCacheElement into the hash table.
332
htable = udata_getHashTable();
334
oldValue = uhash_get(htable, path);
335
if (oldValue != NULL) {
336
*pErr = U_USING_DEFAULT_WARNING; }
340
newElement->name, /* Key */
341
newElement, /* Value */
346
if (*pErr == U_USING_DEFAULT_WARNING || U_FAILURE(*pErr)) {
347
uprv_free(newElement->name);
348
uprv_free(newElement->item);
349
uprv_free(newElement);
353
return newElement->item;
359
/*----------------------------------------------------------------------*
361
* Add a static reference to the common data library *
362
* Unless overridden by an explicit u_setCommonData, this will be *
365
*----------------------------------------------------------------------*/
366
extern const DataHeader U_IMPORT U_ICUDATA_ENTRY_POINT;
369
/*----------------------------------------------------------------------*
371
* openCommonData Attempt to open a common format (.dat) file *
372
* Map it into memory (if it's not there already) *
373
* and return a UDataMemory object for it. *
375
* If the requested data is already open and cached *
376
* just return the cached UDataMem object. *
378
*----------------------------------------------------------------------*/
381
const char *path, /* Path from OpenCHoice? */
382
UBool isICUData, /* ICU Data true if path == NULL */
383
UErrorCode *pErrorCode)
385
const char *inBasename;
386
char *basename, *suffix;
387
char pathBuffer[1024];
390
if (U_FAILURE(*pErrorCode)) {
394
UDataMemory_init(&tData);
397
/* "mini-cache" for common ICU data */
398
if(gCommonICUData != NULL) {
399
return gCommonICUData;
402
tData.pHeader = &U_ICUDATA_ENTRY_POINT;
403
udata_checkCommonData(&tData, pErrorCode);
404
setCommonICUData(&tData, NULL, FALSE, pErrorCode);
405
return gCommonICUData;
409
/* request is NOT for ICU Data. */
411
/* Find the base name portion of the supplied path. */
412
/* inBasename will be left pointing somewhere within the original path string. */
413
inBasename=findBasename(path);
415
/* no basename. This will happen if the original path was a directory name, */
416
/* like "a/b/c/". (Fallback to separate files will still work.) */
417
*pErrorCode=U_FILE_ACCESS_ERROR;
421
/* Is the requested common data file already open and cached? */
422
/* Note that the cache is keyed by the base name only. The rest of the path, */
423
/* if any, is not considered. */
425
UDataMemory *dataToReturn = udata_findCachedData(inBasename);
426
if (dataToReturn != NULL) {
431
/* Requested item is not in the cache.
432
* Hunt it down, trying all the fall back locations.
435
/* try path/basename first, then basename only */
436
basename=uprv_computeDirPath(path, pathBuffer); /* pathBuffer = directory path */
437
suffix=strcpy_returnEnd(basename, inBasename); /* append the base name. */
438
uprv_strcpy(suffix, ".dat"); /* append ".dat" */
440
uprv_mapFile(&tData, pathBuffer);
442
if (!UDataMemory_isLoaded(&tData)) {
443
/* The data didn't open. Try again without the directory portion of the name */
444
if (basename!=pathBuffer) {
445
uprv_mapFile(&tData, basename);
449
if (!UDataMemory_isLoaded(&tData)) {
451
*pErrorCode=U_FILE_ACCESS_ERROR;
455
/* we have mapped a file, check its header */
456
udata_checkCommonData(&tData, pErrorCode);
459
/* Cache the UDataMemory struct for this .dat file,
460
* so we won't need to hunt it down and map it again next time
461
* something is needed from it. */
462
return udata_cacheDataItem(inBasename, &tData, pErrorCode);
467
# define MAX_STUB_ENTRIES 7
469
# define MAX_STUB_ENTRIES 0
473
/*----------------------------------------------------------------------*
475
* extendICUData If the full set of ICU data was not loaded at *
476
* program startup, load it now. This function will *
477
* be called when the lookup of an ICU data item in *
478
* the common ICU data fails. *
480
* The parameter is the UDataMemory in which the *
481
* search for a requested item failed. *
483
* return true if new data is loaded, false otherwise.*
485
*----------------------------------------------------------------------*/
486
static UBool extendICUData(UDataMemory *failedData, UErrorCode *pErr)
488
/* If the data library that we are running with turns out to be the
489
* stub library (or, on the 390, the subset library), we will try to
490
* load a .dat file instead. The stub library has no entries in its
491
* TOC, which is how we identify it here.
494
UDataMemory copyPData;
496
if (failedData->vFuncs->NumEntries(failedData) > MAX_STUB_ENTRIES) {
497
/* Not the stub. We can't extend. */
501
/* See if we can explicitly open a .dat file for the ICUData. */
502
pData = openCommonData(
503
U_ICUDATA_NAME, /* "icudt20l" , for example. */
504
FALSE, /* Pretend we're not opening ICUData */
507
/* How about if there is no pData, eh... */
509
UDataMemory_init(©PData);
511
UDatamemory_assign(©PData, pData);
512
copyPData.map = 0; /* The mapping for this data is owned by the hash table */
513
copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */
514
/* CommonICUData is also unmapped when ICU is shut down.*/
515
/* To avoid unmapping the data twice, zero out the map */
516
/* fields in the UDataMemory that we're assigning */
517
/* to CommonICUData. */
519
setCommonICUData(©PData, /* The new common data. */
520
failedData, /* Old ICUData ptr. Overwrite of this value is ok, */
521
FALSE, /* No warnings if write didn't happen */
522
pErr); /* setCommonICUData honors errors; NOP if error set */
526
return gCommonICUData != failedData; /* Return true if ICUData pointer was updated. */
527
/* (Could potentialy have been done by another thread racing */
528
/* us through here, but that's fine, we still return true */
529
/* so that current thread will also examine extended data. */
535
/*----------------------------------------------------------------------*
537
* udata_setCommonData *
539
*----------------------------------------------------------------------*/
540
U_CAPI void U_EXPORT2
541
udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
542
UDataMemory dataMemory;
544
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
549
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
553
/* do we already have common ICU data set? */
554
if(gCommonICUData != NULL) {
555
*pErrorCode=U_USING_DEFAULT_ERROR;
559
/* set the data pointer and test for validity */
560
UDataMemory_init(&dataMemory);
561
UDataMemory_setData(&dataMemory, data);
562
udata_checkCommonData(&dataMemory, pErrorCode);
563
if (U_FAILURE(*pErrorCode)) {return;}
565
/* we have good data */
566
/* Set it up as the ICU Common Data. */
567
setCommonICUData(&dataMemory, NULL, TRUE, pErrorCode);
573
/*---------------------------------------------------------------------------
577
*---------------------------------------------------------------------------- */
578
U_CAPI void U_EXPORT2
579
udata_setAppData(const char *path, const void *data, UErrorCode *err)
583
if(err==NULL || U_FAILURE(*err)) {
587
*err=U_ILLEGAL_ARGUMENT_ERROR;
591
UDataMemory_init(&udm);
593
udata_checkCommonData(&udm, err);
594
udata_cacheDataItem(path, &udm, err);
597
/*----------------------------------------------------------------------------*
599
* checkDataItem Given a freshly located/loaded data item, either *
600
* an entry in a common file or a separately loaded file, *
601
* sanity check its header, and see if the data is *
602
* acceptable to the app. *
603
* If the data is good, create and return a UDataMemory *
604
* object that can be returned to the application. *
605
* Return NULL on any sort of failure. *
607
*----------------------------------------------------------------------------*/
611
const DataHeader *pHeader, /* The data item to be checked. */
612
UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */
613
void *context, /* pass-thru param for above. */
614
const char *type, /* pass-thru param for above. */
615
const char *name, /* pass-thru param for above. */
616
UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */
617
/* but openChoice should continue with */
618
/* trying to get data from fallback path. */
619
UErrorCode *fatalErr /* Bad error, caller should return immediately */
622
UDataMemory *rDataMem = NULL; /* the new UDataMemory, to be returned. */
624
if (U_FAILURE(*fatalErr)) {
628
if(pHeader->dataHeader.magic1==0xda &&
629
pHeader->dataHeader.magic2==0x27 &&
630
pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
631
(isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info))
633
rDataMem=UDataMemory_createNewInstance(fatalErr);
634
if (U_FAILURE(*fatalErr)) {
637
rDataMem->pHeader = pHeader;
639
/* the data is not acceptable, look further */
640
/* If we eventually find something good, this errorcode will be */
642
*nonFatalErr=U_INVALID_FORMAT_ERROR;
651
* A note on the ownership of Mapped Memory
653
* For common format files, ownership resides with the UDataMemory object
654
* that lives in the cache of opened common data. These UDataMemorys are private
655
* to the udata implementation, and are never seen directly by users.
657
* The UDataMemory objects returned to users will have the address of some desired
658
* data within the mapped region, but they wont have the mapping info itself, and thus
659
* won't cause anything to be removed from memory when they are closed.
661
* For individual data files, the UDataMemory returned to the user holds the
662
* information necessary to unmap the data on close. If the user independently
663
* opens the same data file twice, two completely independent mappings will be made.
664
* (There is no cache of opened data items from individual files, only a cache of
665
* opened Common Data files, that is, files containing a collection of data items.)
667
* For common data passed in from the user via udata_setAppData() or
668
* udata_setCommonData(), ownership remains with the user.
670
* UDataMemory objects themselves, as opposed to the memory they describe,
671
* can be anywhere - heap, stack/local or global.
672
* They have a flag to indicate when they're heap allocated and thus
673
* must be deleted when closed.
677
/*----------------------------------------------------------------------------*
679
* main data loading functions *
681
*----------------------------------------------------------------------------*/
683
doOpenChoice(const char *path, const char *type, const char *name,
684
UDataMemoryIsAcceptable *isAcceptable, void *context,
685
UErrorCode *pErrorCode)
687
char pathBuffer[1024];
688
char tocEntryName[100];
689
UDataMemory dataMemory;
690
UDataMemory *pCommonData;
691
UDataMemory *pEntryData;
692
const DataHeader *pHeader;
693
const char *inBasename;
696
UErrorCode errorCode=U_ZERO_ERROR;
697
UBool isICUData= (UBool)(path==NULL);
700
/* Make up a full mame by appending the type to the supplied
701
* name, assuming that a type was supplied.
703
uprv_strcpy(tocEntryName, name);
704
if(type!=NULL && *type!=0) {
705
uprv_strcat(tocEntryName, ".");
706
uprv_strcat(tocEntryName, type);
709
/* try to get common data. The loop is for platforms such as the 390 that do
710
* not initially load the full set of ICU data. If the lookup of an ICU data item
711
* fails, the full (but slower to load) set is loaded, the and the loop repeats,
712
* trying the lookup again. Once the full set of ICU data is loaded, the loop wont
713
* repeat because the full set will be checked the first time through.
715
* The loop also handles the fallback to a .dat file if the application linked
716
* to the stub data library rather than a real library.
719
pCommonData=openCommonData(path, isICUData, &errorCode);
721
if(U_SUCCESS(errorCode)) {
722
/* look up the data piece in the common data */
723
pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &errorCode);
725
pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, &errorCode, pErrorCode);
726
if (U_FAILURE(*pErrorCode)) {
729
if (pEntryData != NULL) {
734
/* Data wasn't found. If we were looking for an ICUData item and there is
735
* more data available, load it and try again,
736
* otherwise break out of this loop. */
737
if (!(isICUData && extendICUData(pCommonData, &errorCode))) {
743
/* the data was not found in the common data, look further, */
744
/* try to get an individual data file */
745
basename=uprv_computeDirPath(path, pathBuffer);
747
inBasename=COMMON_DATA_NAME;
749
inBasename=findBasename(path);
753
fprintf(stderr, "looking for ind. file\n");
756
/* try path+basename+"_"+entryName first */
758
suffix=strcpy_returnEnd(basename, inBasename);
760
uprv_strcpy(suffix, tocEntryName);
762
if( uprv_mapFile(&dataMemory, pathBuffer) ||
763
(basename!=pathBuffer && uprv_mapFile(&dataMemory, basename)))
765
/* We mapped a file. Check out its contents. */
766
pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, &errorCode, pErrorCode);
767
if (pEntryData != NULL)
770
* Hand off ownership of the backing memory to the user's UDataMemory.
772
pEntryData->mapAddr = dataMemory.mapAddr;
773
pEntryData->map = dataMemory.map;
777
/* the data is not acceptable, or some error occured. Either way, unmap the memory */
778
udata_close(&dataMemory);
780
/* If we had a nasty error, bail out completely. */
781
if (U_FAILURE(*pErrorCode)) {
785
/* Otherwise remember that we found data but didn't like it for some reason,
786
* and continue looking
788
errorCode=U_INVALID_FORMAT_ERROR;
792
/* try path+entryName next */
793
uprv_strcpy(basename, tocEntryName);
794
if( uprv_mapFile(&dataMemory, pathBuffer) ||
795
(basename!=pathBuffer && uprv_mapFile(&dataMemory, basename)))
797
pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, &errorCode, pErrorCode);
798
if (pEntryData != NULL) {
800
* Hand off ownership of the backing memory to the user's UDataMemory.
802
pEntryData->mapAddr = dataMemory.mapAddr;
803
pEntryData->map = dataMemory.map;
807
/* the data is not acceptable, or some error occured. Either way, unmap the memory */
808
udata_close(&dataMemory);
810
/* If we had a nasty error, bail out completely. */
811
if (U_FAILURE(*pErrorCode)) {
815
/* Otherwise remember that we found data but didn't like it for some reason */
816
errorCode=U_INVALID_FORMAT_ERROR;
820
if(U_SUCCESS(*pErrorCode)) {
821
if(U_SUCCESS(errorCode)) {
823
*pErrorCode=U_FILE_ACCESS_ERROR;
825
/* entry point not found or rejected */
826
*pErrorCode=errorCode;
834
/* API ---------------------------------------------------------------------- */
836
U_CAPI UDataMemory * U_EXPORT2
837
udata_open(const char *path, const char *type, const char *name,
838
UErrorCode *pErrorCode) {
840
fprintf(stderr, "udata_open(): Opening: %s . %s\n", name, type);
844
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
846
} else if(name==NULL || *name==0) {
847
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
850
return doOpenChoice(path, type, name, NULL, NULL, pErrorCode);
856
U_CAPI UDataMemory * U_EXPORT2
857
udata_openChoice(const char *path, const char *type, const char *name,
858
UDataMemoryIsAcceptable *isAcceptable, void *context,
859
UErrorCode *pErrorCode) {
861
fprintf(stderr, "udata_openChoice(): Opening: %s . %s\n", name, type);fflush(stderr);
864
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
866
} else if(name==NULL || *name==0 || isAcceptable==NULL) {
867
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
870
return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
876
U_CAPI void U_EXPORT2
877
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
879
if(pData!=NULL && pData->pHeader!=NULL) {
880
const UDataInfo *info=&pData->pHeader->info;
881
if(pInfo->size>info->size) {
882
pInfo->size=info->size;
884
uprv_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, pInfo->size-2);