2
*******************************************************************************
3
* Copyright (C) 2006-2010, International Business Machines Corporation and *
4
* others. All Rights Reserved. *
5
*******************************************************************************
7
*******************************************************************************
9
package com.ibm.icu.charset;
11
import java.io.BufferedInputStream;
12
import java.io.IOException;
13
import java.io.InputStream;
14
import java.nio.ByteBuffer;
16
import com.ibm.icu.impl.ICUData;
17
import com.ibm.icu.impl.ICUResourceBundle;
19
final class UConverterAlias {
20
static final int UNNORMALIZED = 0;
22
static final int STD_NORMALIZED = 1;
24
static final int AMBIGUOUS_ALIAS_MAP_BIT = 0x8000;
26
static final int CONTAINS_OPTION_BIT = 0x4000;
28
static final int CONVERTER_INDEX_MASK = 0xFFF;
30
static final int NUM_RESERVED_TAGS = 2;
32
static final int NUM_HIDDEN_TAGS = 1;
34
static int[] gConverterList = null;
36
static int[] gTagList = null;
38
static int[] gAliasList = null;
40
static int[] gUntaggedConvArray = null;
42
static int[] gTaggedAliasArray = null;
44
static int[] gTaggedAliasLists = null;
46
static int[] gOptionTable = null;
48
static byte[] gStringTable = null;
50
static byte[] gNormalizedStringTable = null;
52
static final String GET_STRING(int idx) {
53
return extractString(gStringTable, 2 * idx);
56
private static final String GET_NORMALIZED_STRING(int idx) {
57
return extractString(gNormalizedStringTable, 2 * idx);
60
private static final String extractString(byte[] sArray, int sBegin) {
61
char[] buf = new char[strlen(sArray, sBegin)];
62
for (int i = 0; i < buf.length; i++) {
63
buf[i] = (char)(sArray[sBegin + i] & 0xff);
65
return new String(buf);
68
private static final int strlen(byte[] sArray, int sBegin)
71
while(i < sArray.length && sArray[i++] != 0) {}
72
return i - sBegin - 1;
75
/*private*/ static final int tocLengthIndex = 0;
77
private static final int converterListIndex = 1;
79
private static final int tagListIndex = 2;
81
private static final int aliasListIndex = 3;
83
private static final int untaggedConvArrayIndex = 4;
85
private static final int taggedAliasArrayIndex = 5;
87
private static final int taggedAliasListsIndex = 6;
89
private static final int optionTableIndex = 7;
91
private static final int stringTableIndex = 8;
93
private static final int normalizedStringTableIndex = 9;
95
private static final int minTocLength = 9; /*
96
* min. tocLength in the file,
101
private static final int offsetsCount = minTocLength + 1; /*
108
static ByteBuffer gAliasData = null;
110
private static final boolean isAlias(String alias) {
112
throw new IllegalArgumentException("Alias param is null!");
114
return (alias.length() != 0);
117
private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu";
120
* Default buffer size of datafile
122
private static final int CNVALIAS_DATA_BUFFER_SIZE = 25000;
124
private static final synchronized boolean haveAliasData()
128
// agljport:todo umtx_lock(NULL);
129
needInit = gAliasData == null;
131
/* load converter alias data from file if necessary */
133
ByteBuffer data = null;
134
int[] tableArray = null;
136
//byte[] reservedBytes = null;
138
InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME);
139
BufferedInputStream b = new BufferedInputStream(i, CNVALIAS_DATA_BUFFER_SIZE);
140
UConverterAliasDataReader reader = new UConverterAliasDataReader(b);
141
tableArray = reader.readToc(offsetsCount);
143
tableStart = tableArray[0];
144
if (tableStart < minTocLength) {
145
throw new IOException("Invalid data format.");
147
gConverterList = new int[tableArray[converterListIndex]];
148
gTagList= new int[tableArray[tagListIndex]];
149
gAliasList = new int[tableArray[aliasListIndex]];
150
gUntaggedConvArray = new int[tableArray[untaggedConvArrayIndex]];
151
gTaggedAliasArray = new int[tableArray[taggedAliasArrayIndex]];
152
gTaggedAliasLists = new int[tableArray[taggedAliasListsIndex]];
153
gOptionTable = new int[tableArray[optionTableIndex]];
154
gStringTable = new byte[tableArray[stringTableIndex]*2];
155
gNormalizedStringTable = new byte[tableArray[normalizedStringTableIndex]*2];
157
reader.read(gConverterList, gTagList,
158
gAliasList, gUntaggedConvArray,
159
gTaggedAliasArray, gTaggedAliasLists,
160
gOptionTable, gStringTable, gNormalizedStringTable);
161
data = ByteBuffer.allocate(0); // dummy UDataMemory object in absence
164
if (gOptionTable[0] != STD_NORMALIZED) {
165
throw new IOException("Unsupported alias normalization");
168
// agljport:todo umtx_lock(NULL);
169
if (gAliasData == null) {
173
// agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO,
176
// agljport:todo umtx_unlock(NULL);
178
/* if a different thread set it first, then close the extra data */
180
// agljport:fix udata_close(data); /* NULL if it was set
188
// U_CFUNC const char * io_getConverterName(const char *alias, UErrorCode
190
// public static final String io_getConverterName(String alias)
191
// throws IOException{
192
// if (haveAliasData() && isAlias(alias)) {
193
// boolean[] isAmbigous = new boolean[1];
194
// int convNum = findConverter(alias, isAmbigous);
195
// if (convNum < gConverterList.length) {
196
// return GET_STRING(gConverterList[(int) convNum]);
198
// /* else converter not found */
204
* search for an alias return the converter number index for gConverterList
206
// static U_INLINE uint32_t findConverter(const char *alias, UErrorCode
208
private static final int findConverter(String alias, boolean[] isAmbigous) {
209
int mid, start, limit;
212
StringBuilder strippedName = new StringBuilder();
213
String aliasToCompare;
215
stripForCompare(strippedName, alias);
216
alias = strippedName.toString();
218
/* do a binary search for the alias */
220
limit = gUntaggedConvArray.length;
222
lastMid = Integer.MAX_VALUE;
225
mid = (start + limit) / 2;
226
if (lastMid == mid) { /* Have we moved? */
227
break; /* We haven't moved, and it wasn't found. */
230
aliasToCompare = GET_NORMALIZED_STRING(gAliasList[mid]);
231
result = alias.compareTo(aliasToCompare);
235
} else if (result > 0) {
239
* Since the gencnval tool folds duplicates into one entry, this
240
* alias in gAliasList is unique, but different standards may
241
* map an alias to different converters.
243
if ((gUntaggedConvArray[mid] & AMBIGUOUS_ALIAS_MAP_BIT) != 0) {
246
/* State whether the canonical converter name contains an option.
247
This information is contained in this list in order to maintain backward & forward compatibility. */
248
/*if (containsOption) {
249
UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
250
*containsOption = (UBool)((containsCnvOptionInfo
251
&& ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
252
|| !containsCnvOptionInfo);
254
return gUntaggedConvArray[mid] & CONVERTER_INDEX_MASK;
257
return Integer.MAX_VALUE;
261
* stripForCompare Remove the underscores, dashes and spaces from
262
* the name, and convert the name to lower case.
264
* @param dst The destination buffer, which is <= the buffer of name.
265
* @param name The alias to strip
266
* @return the destination buffer.
268
public static final StringBuilder stripForCompare(StringBuilder dst, String name) {
269
return io_stripASCIIForCompare(dst, name);
273
private static final byte IGNORE = 0;
274
private static final byte ZERO = 1;
275
private static final byte NONZERO = 2;
276
static final byte MINLETTER = 3; /* any values from here on are lowercase letter mappings */
279
/* character types for ASCII 00..7F */
280
static final byte asciiTypes[] = new byte[] {
281
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284
ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
285
0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
286
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
287
0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
288
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
291
private static final char GET_CHAR_TYPE(char c) {
292
return (char)((c < asciiTypes.length) ? asciiTypes[c] : (char)IGNORE);
295
/** @see UConverterAlias#compareNames */
296
private static final StringBuilder io_stripASCIIForCompare(StringBuilder dst, String name) {
300
boolean afterDigit = false;
302
while (nameIndex < name.length()) {
303
c1 = name.charAt(nameIndex++);
304
type = GET_CHAR_TYPE(c1);
308
continue; /* ignore all but letters and digits */
310
if (!afterDigit && nameIndex < name.length()) {
311
nextType = GET_CHAR_TYPE(name.charAt(nameIndex));
312
if (nextType == ZERO || nextType == NONZERO) {
313
continue; /* ignore leading zero before another digit */
321
c1 = type; /* lowercased letter */
331
* Do a fuzzy compare of a two converter/alias names. The comparison is
332
* case-insensitive. It also ignores the characters '-', '_', and ' ' (dash,
333
* underscore, and space). Thus the strings "UTF-8", "utf_8", and "Utf 8"
334
* are exactly equivalent.
336
* This is a symmetrical (commutative) operation; order of arguments is
337
* insignificant. This is an important property for sorting the list (when
338
* the list is preprocessed into binary form) and for performing binary
339
* searches on it at run time.
342
* a converter name or alias, zero-terminated
344
* a converter name or alias, zero-terminated
345
* @return 0 if the names match, or a negative value if the name1 lexically
346
* precedes name2, or a positive value if the name1 lexically
349
* @see UConverterAlias#stripForCompare
351
static int compareNames(String name1, String name2){
352
int rc, name1Index = 0, name2Index = 0;
355
boolean afterDigit1 = false, afterDigit2 = false;
358
while (name1Index < name1.length()) {
359
c1 = name1.charAt(name1Index++);
360
type = GET_CHAR_TYPE(c1);
364
continue; /* ignore all but letters and digits */
366
if (!afterDigit1 && name1Index < name1.length()) {
367
nextType = GET_CHAR_TYPE(name1.charAt(name1Index));
368
if (nextType == ZERO || nextType == NONZERO) {
369
continue; /* ignore leading zero before another digit */
377
c1 = type; /* lowercased letter */
381
break; /* deliver c1 */
383
while (name2Index < name2.length()) {
384
c2 = name2.charAt(name2Index++);
385
type = GET_CHAR_TYPE(c2);
389
continue; /* ignore all but letters and digits */
391
if (!afterDigit2 && name1Index < name1.length()) {
392
nextType = GET_CHAR_TYPE(name2.charAt(name2Index));
393
if (nextType == ZERO || nextType == NONZERO) {
394
continue; /* ignore leading zero before another digit */
402
c2 = type; /* lowercased letter */
406
break; /* deliver c2 */
409
/* If we reach the ends of both strings then they match */
410
if (name1Index >= name1.length() && name2Index >= name2.length()) {
414
/* Case-insensitive comparison */
415
rc = (int)c1 - (int)c2;
422
static int io_countAliases(String alias)
424
if (haveAliasData() && isAlias(alias)) {
425
boolean[] isAmbigous = new boolean[1];
426
int convNum = findConverter(alias, isAmbigous);
427
if (convNum < gConverterList.length) {
428
/* tagListNum - 1 is the ALL tag */
429
int listOffset = gTaggedAliasArray[(gTagList.length - 1)
430
* gConverterList.length + convNum];
432
if (listOffset != 0) {
433
return gTaggedAliasLists[listOffset];
435
/* else this shouldn't happen. internal program error */
437
/* else converter not found */
443
* Return the number of all aliases (and converter names).
445
* @return the number of all aliases
447
// U_CFUNC uint16_t io_countTotalAliases(UErrorCode *pErrorCode);
448
// static int io_countTotalAliases() throws IOException{
449
// if (haveAliasData()) {
450
// return (int) gAliasList.length;
455
// U_CFUNC const char * io_getAlias(const char *alias, uint16_t n,
456
// UErrorCode *pErrorCode)
457
static String io_getAlias(String alias, int n) throws IOException{
458
if (haveAliasData() && isAlias(alias)) {
459
boolean[] isAmbigous = new boolean[1];
460
int convNum = findConverter(alias,isAmbigous);
461
if (convNum < gConverterList.length) {
462
/* tagListNum - 1 is the ALL tag */
463
int listOffset = gTaggedAliasArray[(gTagList.length - 1)
464
* gConverterList.length + convNum];
466
if (listOffset != 0) {
467
//int listCount = gTaggedAliasListsArray[listOffset];
468
/* +1 to skip listCount */
469
int[] currListArray = gTaggedAliasLists;
470
int currListArrayIndex = listOffset + 1;
472
return GET_STRING(currListArray[currListArrayIndex + n]);
475
/* else this shouldn't happen. internal program error */
477
/* else converter not found */
482
// U_CFUNC uint16_t io_countStandards(UErrorCode *pErrorCode) {
483
// static int io_countStandards() throws IOException{
484
// if (haveAliasData()) {
485
// return (int) (gTagList.length - NUM_HIDDEN_TAGS);
490
// U_CAPI const char * U_EXPORT2getStandard(uint16_t n, UErrorCode
492
// static String getStandard(int n) throws IOException{
493
// if (haveAliasData()) {
494
// return GET_STRING(gTagList[n]);
499
// U_CAPI const char * U_EXPORT2 getStandardName(const char *alias, const
500
// char *standard, UErrorCode *pErrorCode)
501
static final String getStandardName(String alias, String standard)throws IOException {
502
if (haveAliasData() && isAlias(alias)) {
503
int listOffset = findTaggedAliasListsOffset(alias, standard);
505
if (0 < listOffset && listOffset < gTaggedAliasLists.length) {
506
int[] currListArray = gTaggedAliasLists;
507
int currListArrayIndex = listOffset + 1;
508
if (currListArray[0] != 0) {
509
return GET_STRING(currListArray[currListArrayIndex]);
516
// U_CAPI uint16_t U_EXPORT2 countAliases(const char *alias, UErrorCode
518
static int countAliases(String alias) throws IOException{
519
return io_countAliases(alias);
522
// U_CAPI const char* U_EXPORT2 getAlias(const char *alias, uint16_t n,
523
// UErrorCode *pErrorCode)
524
static String getAlias(String alias, int n) throws IOException{
525
return io_getAlias(alias, n);
528
// U_CFUNC uint16_t countStandards(void)
529
// static int countStandards()throws IOException{
530
// return io_countStandards();
533
/*returns a single Name from the list, will return NULL if out of bounds
535
static String getAvailableName (int n){
537
if (0 <= n && n <= 0xffff) {
538
String name = bld_getAvailableConverter(n);
541
}catch(IOException ex){
542
//throw away exception
546
// U_CAPI const char * U_EXPORT2 getCanonicalName(const char *alias, const
547
// char *standard, UErrorCode *pErrorCode) {
548
static String getCanonicalName(String alias, String standard) throws IOException{
549
if (haveAliasData() && isAlias(alias)) {
550
int convNum = findTaggedConverterNum(alias, standard);
552
if (convNum < gConverterList.length) {
553
return GET_STRING(gConverterList[convNum]);
559
static int countAvailable (){
561
return bld_countAvailableConverters();
562
}catch(IOException ex){
563
//throw away exception
568
// U_CAPI UEnumeration * U_EXPORT2 openStandardNames(const char *convName,
569
// const char *standard, UErrorCode *pErrorCode)
570
/* static final UConverterAliasesEnumeration openStandardNames(String convName, String standard)throws IOException {
571
UConverterAliasesEnumeration aliasEnum = null;
572
if (haveAliasData() && isAlias(convName)) {
573
int listOffset = findTaggedAliasListsOffset(convName, standard);
576
* When listOffset == 0, we want to acknowledge that the converter
577
* name and standard are okay, but there is nothing to enumerate.
579
if (listOffset < gTaggedAliasLists.length) {
581
UConverterAliasesEnumeration.UAliasContext context = new UConverterAliasesEnumeration.UAliasContext(listOffset, 0);
582
aliasEnum = new UConverterAliasesEnumeration();
583
aliasEnum.setContext(context);
585
else converter or tag not found
590
// static uint32_t getTagNumber(const char *tagname)
591
private static int getTagNumber(String tagName) {
592
if (gTagList != null) {
594
for (tagNum = 0; tagNum < gTagList.length; tagNum++) {
595
if (tagName.equals(GET_STRING(gTagList[tagNum]))) {
601
return Integer.MAX_VALUE;
604
// static uint32_t findTaggedAliasListsOffset(const char *alias, const char
605
// *standard, UErrorCode *pErrorCode)
606
private static int findTaggedAliasListsOffset(String alias, String standard) {
610
int tagNum = getTagNumber(standard);
611
boolean[] isAmbigous = new boolean[1];
612
/* Make a quick guess. Hopefully they used a TR22 canonical alias. */
613
convNum = findConverter(alias, isAmbigous);
615
if (tagNum < (gTagList.length - NUM_HIDDEN_TAGS)
616
&& convNum < gConverterList.length) {
617
listOffset = gTaggedAliasArray[tagNum
618
* gConverterList.length + convNum];
620
&& gTaggedAliasLists[listOffset + 1] != 0) {
623
if (isAmbigous[0]==true) {
625
* Uh Oh! They used an ambiguous alias. We have to search the
626
* whole swiss cheese starting at the highest standard affinity.
627
* This may take a while.
630
for (idx = 0; idx < gTaggedAliasArray.length; idx++) {
631
listOffset = gTaggedAliasArray[idx];
632
if (listOffset != 0 && isAliasInList(alias, listOffset)) {
633
int currTagNum = idx / gConverterList.length;
634
int currConvNum = (idx - currTagNum
635
* gConverterList.length);
636
int tempListOffset = gTaggedAliasArray[tagNum
637
* gConverterList.length + currConvNum];
638
if (tempListOffset != 0
639
&& gTaggedAliasLists[tempListOffset + 1] != 0) {
640
return tempListOffset;
643
* else keep on looking We could speed this up by
644
* starting on the next row because an alias is unique
645
* per row, right now. This would change if alias
646
* versioning appears.
650
/* The standard doesn't know about the alias */
652
/* else no default name */
655
/* else converter or tag not found */
657
return Integer.MAX_VALUE;
660
/* Return the canonical name */
661
// static uint32_t findTaggedConverterNum(const char *alias, const char
662
// *standard, UErrorCode *pErrorCode)
663
private static int findTaggedConverterNum(String alias, String standard) {
667
int tagNum = getTagNumber(standard);
668
boolean[] isAmbigous = new boolean[1];
670
/* Make a quick guess. Hopefully they used a TR22 canonical alias. */
671
convNum = findConverter(alias, isAmbigous);
673
if (tagNum < (gTagList.length - NUM_HIDDEN_TAGS)
674
&& convNum < gConverterList.length) {
675
listOffset = gTaggedAliasArray[tagNum
676
* gConverterList.length + convNum];
677
if (listOffset != 0 && isAliasInList(alias, listOffset)) {
680
if (isAmbigous[0] == true) {
682
* Uh Oh! They used an ambiguous alias. We have to search one
683
* slice of the swiss cheese. We search only in the requested
684
* tag, not the whole thing. This may take a while.
686
int convStart = (tagNum) * gConverterList.length;
687
int convLimit = (tagNum + 1) * gConverterList.length;
688
for (idx = convStart; idx < convLimit; idx++) {
689
listOffset = gTaggedAliasArray[idx];
690
if (listOffset != 0 && isAliasInList(alias, listOffset)) {
691
return idx - convStart;
694
/* The standard doesn't know about the alias */
696
/* else no canonical name */
698
/* else converter or tag not found */
700
return Integer.MAX_VALUE;
703
// static U_INLINE UBool isAliasInList(const char *alias, uint32_t
705
private static boolean isAliasInList(String alias, int listOffset) {
706
if (listOffset != 0) {
708
int listCount = gTaggedAliasLists[listOffset];
709
/* +1 to skip listCount */
710
int[] currList = gTaggedAliasLists;
711
int currListArrayIndex = listOffset + 1;
712
for (currAlias = 0; currAlias < listCount; currAlias++) {
713
if (currList[currAlias + currListArrayIndex] != 0
716
GET_STRING(currList[currAlias + currListArrayIndex])) == 0) {
725
static String[] gAvailableConverters = null;
727
static int gAvailableConverterCount = 0;
729
static byte[] gDefaultConverterNameBuffer; // [MAX_CONVERTER_NAME_LENGTH +
730
// 1]; /* +1 for NULL */
732
static String gDefaultConverterName = null;
734
// static UBool haveAvailableConverterList(UErrorCode *pErrorCode)
735
static boolean haveAvailableConverterList() throws IOException{
736
if (gAvailableConverters == null) {
738
int localConverterCount;
739
String converterName;
740
String[] localConverterList;
742
if (!haveAliasData()) {
746
/* We can't have more than "*converterTable" converters to open */
747
localConverterList = new String[gConverterList.length];
749
localConverterCount = 0;
751
for (idx = 0; idx < gConverterList.length; idx++) {
752
converterName = GET_STRING(gConverterList[idx]);
753
//UConverter cnv = UConverter.open(converterName);
755
localConverterList[localConverterCount++] = converterName;
759
// agljport:todo umtx_lock(NULL);
760
if (gAvailableConverters == null) {
761
gAvailableConverters = localConverterList;
762
gAvailableConverterCount = localConverterCount;
763
/* haveData should have already registered the cleanup function */
765
// agljport:todo free((char **)localConverterList);
767
// agljport:todo umtx_unlock(NULL);
772
// U_CFUNC uint16_t bld_countAvailableConverters(UErrorCode *pErrorCode)
773
static int bld_countAvailableConverters() throws IOException{
774
if (haveAvailableConverterList()) {
775
return gAvailableConverterCount;
780
// U_CFUNC const char * bld_getAvailableConverter(uint16_t n, UErrorCode
782
static String bld_getAvailableConverter(int n) throws IOException{
783
if (haveAvailableConverterList()) {
784
if (n < gAvailableConverterCount) {
785
return gAvailableConverters[n];
791
/* default converter name --------------------------------------------------- */
794
* In order to be really thread-safe, the get function would have to take
795
* a buffer parameter and copy the current string inside a mutex block.
796
* This implementation only tries to be really thread-safe while
798
* It assumes that setting a pointer is atomic.
801
// U_CFUNC const char * getDefaultName()
802
// static final synchronized String getDefaultName() {
803
// /* local variable to be thread-safe */
806
// //agljport:todo umtx_lock(null);
807
// name = gDefaultConverterName;
808
// //agljport:todo umtx_unlock(null);
810
// if (name == null) {
811
// //UConverter cnv = null;
814
// name = CharsetICU.getDefaultCharsetName();
816
// /* if the name is there, test it out and get the canonical name with options */
817
// if (name != null) {
818
// // cnv = UConverter.open(name);
819
// // name = cnv.getName(cnv);
823
// if (name == null || name.length() == 0 ||/* cnv == null ||*/
824
// length >= gDefaultConverterNameBuffer.length) {
825
// /* Panic time, let's use a fallback. */
826
// name = new String("US-ASCII");
829
// //length=(int32_t)(strlen(name));
831
// /* Copy the name before we close the converter. */
832
// name = gDefaultConverterName;
b'\\ No newline at end of file'