~ubuntu-branches/ubuntu/gutsy/icu/gutsy-updates

« back to all changes in this revision

Viewing changes to source/common/uscript.c

  • Committer: Package Import Robot
  • Author(s): Jay Berkenbilt
  • Date: 2005-11-19 11:29:31 UTC
  • mfrom: (1.1.2)
  • Revision ID: package-import@ubuntu.com-20051119112931-vcizkrp10tli4enw
Tags: 3.4-3
Explicitly build with g++ 3.4.  The current ICU fails its test suite
with 4.0 but not with 3.4.  Future versions should work properly with
4.0.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
**********************************************************************
3
 
*   Copyright (C) 1997-2001, International Business Machines
4
 
*   Corporation and others.  All Rights Reserved.
5
 
**********************************************************************
6
 
*
7
 
* File USCRIPT.C
8
 
*
9
 
* Modification History:
10
 
*
11
 
*   Date        Name        Description
12
 
*   07/06/2001    Ram         Creation.
13
 
******************************************************************************
14
 
*/
15
 
 
16
 
#include "unicode/uscript.h"
17
 
#include "unicode/ures.h"
18
 
#include "cstring.h"
19
 
 
20
 
#define USCRIPT_CODE_ARRAY_SIZE 440
21
 
 
22
 
static const char kLocaleScript[] = "LocaleScript";
23
 
 
24
 
static const char * const scriptNames[]={
25
 
        "ARABIC",               /* USCRIPT_ARABIC     */
26
 
        "ARMENIAN",             /* USCRIPT_ARMENIAN   */
27
 
        "BENGALI",              /* USCRIPT_BENGALI    */
28
 
        "BOPOMOFO",             /* USCRIPT_BOPOMOFO   */
29
 
        "CANADIAN-ABORIGINAL",  /* USCRIPT_UCAS       */
30
 
        "CHEROKEE",             /* USCRIPT_CHEROKEE   */
31
 
        "COMMON",               /* USCRIPT_COMMON     */
32
 
        "CYRILLIC",             /* USCRIPT_CYRILLIC   */
33
 
        "DESERET",              /* USCRIPT_DESERET    */
34
 
        "DEVANAGARI",           /* USCRIPT_DEVANAGARI */
35
 
        "ETHIOPIC",             /* USCRIPT_ETHIOPIC   */
36
 
        "GEORGIAN",             /* USCRIPT_GEORGIAN   */
37
 
        "GOTHIC",               /* USCRIPT_GOTHIC     */
38
 
        "GREEK",                /* USCRIPT_GREEK      */
39
 
        "GUJARATI",             /* USCRIPT_GUJARATI   */
40
 
        "GURMUKHI",             /* USCRIPT_GURMUKHI   */
41
 
        "HAN",                  /* USCRIPT_HAN        */
42
 
        "HANGUL",               /* USCRIPT_HANGUL     */
43
 
        "HEBREW",               /* USCRIPT_HEBREW     */
44
 
        "HIRAGANA",             /* USCRIPT_HIRAGANA   */
45
 
        "INHERITED",            /* USCRIPT_INHERITED  */
46
 
        "KANNADA",              /* USCRIPT_KANNADA    */
47
 
        "KATAKANA",             /* USCRIPT_KATAKANA   */
48
 
        "KHMER",                /* USCRIPT_KHMER      */
49
 
        "LATIN",                /* USCRIPT_LATIN      */
50
 
        "MALAYALAM",            /* USCRIPT_MALAYALAM  */
51
 
        "MONGOLIAN",            /* USCRIPT_MONGOLIAN  */
52
 
        "MYANMAR",              /* USCRIPT_MYANMAR    */
53
 
        "OGHAM",                /* USCRIPT_OGHAM      */
54
 
        "OLD-ITALIC",           /* USCRIPT_OLD_ITALIC */
55
 
        "ORIYA",                /* USCRIPT_ORIYA      */
56
 
        "RUNIC",                /* USCRIPT_RUNIC      */
57
 
        "SINHALA",              /* USCRIPT_SINHALA    */
58
 
        "SYRIAC",               /* USCRIPT_SYRIAC     */
59
 
        "TAMIL",                /* USCRIPT_TAMIL      */
60
 
        "TELUGU",               /* USCRIPT_TELUGU     */
61
 
        "THAANA",               /* USCRIPT_THANA      */
62
 
        "THAI",                 /* USCRIPT_THAI       */
63
 
        "TIBETAN",              /* USCRIPT_TIBETAN    */
64
 
        "UCAS",                 /* USCRIPT_UCAS       */
65
 
        "YI"                    /* USCRIPT_YI         */
66
 
};
67
 
 
68
 
static const char * const scriptAbbr[]= {
69
 
        "Arab",       /* USCRIPT_ARABIC     */
70
 
        "Armn",       /* USCRIPT_ARMENIAN   */
71
 
        "Beng",       /* USCRIPT_BENGALI    */
72
 
        "Bopo",       /* USCRIPT_BOPOMOFO   */
73
 
        "Cans",       /* USCRIPT_UCAS       */
74
 
        "Cher",       /* USCRIPT_CHEROKEE   */
75
 
        "Cyrl",       /* USCRIPT_CYRILLIC   */
76
 
       /* "Cyrs",  */ /* USCRIPT_CYRILLIC   */
77
 
        "Deva",       /* USCRIPT_DEVANAGARI */
78
 
        "Dsrt",       /* USCRIPT_DESERET    */
79
 
        "Ethi",       /* USCRIPT_ETHIOPIC   */
80
 
       /* "Geoa",  */ /* USCRIPT_GEORGIAN   */
81
 
       /* "Geon",  */ /* USCRIPT_GEORGIAN   */
82
 
        "Geor",       /* USCRIPT_GEORGIAN   */
83
 
        "Goth",       /* USCRIPT_GOTHIC     */
84
 
        "Grek",       /* USCRIPT_GREEK      */
85
 
        "Gujr",       /* USCRIPT_GUJARATI   */
86
 
        "Guru",       /* USCRIPT_GURMUKHI   */
87
 
        "Hang",       /* USCRIPT_HANGUL     */
88
 
        "Hani",       /* USCRIPT_HAN        */
89
 
        "Hebr",       /* USCRIPT_HEBREW     */
90
 
        "Hira",       /* USCRIPT_HIRAGANA   */
91
 
        "Ital",       /* USCRIPT_OLD_ITALIC */
92
 
        "Kana",       /* USCRIPT_KATAKANA   */
93
 
        "Khmr",       /* USCRIPT_KHMER      */
94
 
        "Knda",       /* USCRIPT_KANNADA    */
95
 
        "Lao",        /* USCRIPT_LAO        */
96
 
        /*"Laoo",  */ /* USCRIPT_LAO        */
97
 
        /*"Latf",  */ /* USCRIPT_LATIN      */
98
 
        /*"Latg",  */ /* USCRIPT_LATIN      */
99
 
        "Latn",       /* USCRIPT_LATIN      */
100
 
        "Mlym",       /* USCRIPT_MALAYALAM  */
101
 
        "Mong",       /* USCRIPT_MONGOLIAN  */
102
 
        "Mymr",       /* USCRIPT_MYANMAR    */
103
 
        "Ogam",       /* USCRIPT_OGHAM      */
104
 
        "Orya",       /* USCRIPT_ORIYA      */
105
 
        "Qaac",       /* USCRIPT_COPTIC     */
106
 
        "Qaai",       /* USCRIPT_INHERITED  */
107
 
        "Runr",       /* USCRIPT_RUNIC      */
108
 
        "Sinh",       /* USCRIPT_SINHALA    */
109
 
        "Syrc",       /* USCRIPT_SYRIAC     */
110
 
       /* "Syre",  */ /* USCRIPT_SYRIAC     */
111
 
       /* "Syrj",  */ /* USCRIPT_SYRIAC     */
112
 
       /* "Syrn",  */ /* USCRIPT_SYRIAC     */
113
 
        "Taml",       /* USCRIPT_TAMIL      */
114
 
        "Telu",       /* USCRIPT_TELUGU     */
115
 
        "Thaa",       /* USCRIPT_THANA      */
116
 
        "Thai",       /* USCRIPT_THAI       */
117
 
        "Tibt",       /* USCRIPT_TIBETAN    */
118
 
        "Yiii",       /* USCRIPT_YI         */
119
 
        "Zyyy"        /* USCRIPT_COMMON     */    
120
 
};
121
 
 
122
 
 
123
 
static const UScriptCode scriptNameCodes[]= {
124
 
         USCRIPT_ARABIC     ,
125
 
         USCRIPT_ARMENIAN   ,
126
 
         USCRIPT_BENGALI    ,
127
 
         USCRIPT_BOPOMOFO   ,
128
 
         USCRIPT_UCAS       ,
129
 
         USCRIPT_CHEROKEE   ,
130
 
         USCRIPT_COMMON     ,
131
 
         USCRIPT_CYRILLIC   ,
132
 
         USCRIPT_DESERET    ,
133
 
         USCRIPT_DEVANAGARI ,
134
 
         USCRIPT_ETHIOPIC   ,
135
 
         USCRIPT_GEORGIAN   ,
136
 
         USCRIPT_GOTHIC     ,
137
 
         USCRIPT_GREEK      ,
138
 
         USCRIPT_GUJARATI   ,
139
 
         USCRIPT_GURMUKHI   ,
140
 
         USCRIPT_HAN        ,
141
 
         USCRIPT_HANGUL     ,
142
 
         USCRIPT_HEBREW     ,
143
 
         USCRIPT_HIRAGANA   ,
144
 
         USCRIPT_INHERITED  ,
145
 
         USCRIPT_KANNADA    ,
146
 
         USCRIPT_KATAKANA   ,
147
 
         USCRIPT_KHMER      ,
148
 
         USCRIPT_LATIN      ,
149
 
         USCRIPT_MALAYALAM  ,
150
 
         USCRIPT_MONGOLIAN  ,
151
 
         USCRIPT_MYANMAR    ,
152
 
         USCRIPT_OGHAM      ,
153
 
         USCRIPT_OLD_ITALIC ,
154
 
         USCRIPT_ORIYA      ,
155
 
         USCRIPT_RUNIC      ,
156
 
         USCRIPT_SINHALA    ,
157
 
         USCRIPT_SYRIAC     ,
158
 
         USCRIPT_TAMIL      ,
159
 
         USCRIPT_TELUGU     ,
160
 
         USCRIPT_THAANA     ,
161
 
         USCRIPT_THAI       ,
162
 
         USCRIPT_TIBETAN    ,
163
 
         USCRIPT_UCAS       ,
164
 
         USCRIPT_YI
165
 
};
166
 
 
167
 
 
168
 
static const UScriptCode scriptAbbrCodes[] = {
169
 
        USCRIPT_ARABIC     ,
170
 
        USCRIPT_ARMENIAN   ,
171
 
        USCRIPT_BENGALI    ,
172
 
        USCRIPT_BOPOMOFO   ,
173
 
        USCRIPT_UCAS       ,
174
 
        USCRIPT_CHEROKEE   ,
175
 
        USCRIPT_CYRILLIC   ,
176
 
       /* USCRIPT_CYRILLIC   , */
177
 
        USCRIPT_DEVANAGARI ,
178
 
        USCRIPT_DESERET    ,
179
 
        USCRIPT_ETHIOPIC   ,
180
 
      /*  USCRIPT_GEORGIAN   , */
181
 
      /*  USCRIPT_GEORGIAN   , */
182
 
        USCRIPT_GEORGIAN   ,
183
 
        USCRIPT_GOTHIC     ,
184
 
        USCRIPT_GREEK      ,
185
 
        USCRIPT_GUJARATI   ,
186
 
        USCRIPT_GURMUKHI   ,
187
 
        USCRIPT_HANGUL     ,
188
 
        USCRIPT_HAN        ,
189
 
        USCRIPT_HEBREW     ,
190
 
        USCRIPT_HIRAGANA   ,
191
 
        USCRIPT_OLD_ITALIC ,
192
 
        USCRIPT_KATAKANA   ,
193
 
        USCRIPT_KHMER      ,
194
 
        USCRIPT_KANNADA    ,
195
 
        USCRIPT_LAO        ,
196
 
      /*  USCRIPT_LAO        , */
197
 
      /*  USCRIPT_LATIN      , */
198
 
      /*  USCRIPT_LATIN      , */
199
 
        USCRIPT_LATIN      ,
200
 
        USCRIPT_MALAYALAM  ,
201
 
        USCRIPT_MONGOLIAN  ,
202
 
        USCRIPT_MYANMAR    ,
203
 
        USCRIPT_OGHAM      ,
204
 
        USCRIPT_ORIYA      ,
205
 
        USCRIPT_COPTIC     ,
206
 
        USCRIPT_INHERITED  ,
207
 
        USCRIPT_RUNIC      ,
208
 
        USCRIPT_SINHALA    ,
209
 
        USCRIPT_SYRIAC     ,
210
 
      /*  USCRIPT_SYRIAC     , */
211
 
      /*  USCRIPT_SYRIAC     , */
212
 
      /*  USCRIPT_SYRIAC     , */
213
 
        USCRIPT_TAMIL      ,
214
 
        USCRIPT_TELUGU     ,
215
 
        USCRIPT_THAANA     ,
216
 
        USCRIPT_THAI       ,
217
 
        USCRIPT_TIBETAN    ,
218
 
        USCRIPT_YI         ,
219
 
        USCRIPT_COMMON
220
 
};
221
 
 
222
 
/* binary search the string array */
223
 
U_INLINE static int 
224
 
findStringIndex(const char* const *sortedArr, const char* target, int size){
225
 
    int left, middle, right,rc;
226
 
    left =0;
227
 
    right= size-1;
228
 
    
229
 
    while(left <= right){
230
 
        middle = (left+right)/2;
231
 
        rc=uprv_stricmp(sortedArr[middle],target);
232
 
        if(rc<0){
233
 
            left = middle+1;
234
 
        }else if(rc >0){
235
 
            right = middle -1;
236
 
        }else{
237
 
            return middle;
238
 
        }
239
 
    }
240
 
    return -1;
241
 
}
242
 
 
243
 
/*
244
 
static int 
245
 
findCodeIndex(const UScriptCode sorted[], const UScriptCode target, int size){
246
 
    int left, middle, right;
247
 
    left =0;
248
 
    right= size-1;
249
 
    while(left <= right){
250
 
        middle = (left+right)/2;
251
 
        if(sorted[middle] < target){
252
 
            left=middle+1;
253
 
        }else if(sorted[middle]>target){
254
 
            right=middle-1;
255
 
        }else{
256
 
            return middle;
257
 
        }
258
 
    }
259
 
    return -1;
260
 
}
261
 
*/
262
 
/* linearly search the array and return the index */
263
 
U_INLINE static int
264
 
findCodeIndex(const UScriptCode unsorted[], const UScriptCode target, int size){
265
 
    int i=0;
266
 
    while(i<size){
267
 
        if(target == unsorted[i]){
268
 
            return i;
269
 
        }
270
 
        i++;
271
 
    }
272
 
    return -1;
273
 
}
274
 
 
275
 
U_CAPI int32_t  U_EXPORT2
276
 
uscript_getCode(const char* nameOrAbbrOrLocale,
277
 
                UScriptCode* fillIn,
278
 
                int32_t capacity,
279
 
                UErrorCode* err){
280
 
 
281
 
    UScriptCode code = USCRIPT_INVALID_CODE;
282
 
    int strIndex=0;
283
 
    int32_t numFilled=0;
284
 
    int32_t len=0;
285
 
    /* check arguments */
286
 
    if(err==NULL ||U_FAILURE(*err)){
287
 
        return numFilled;
288
 
    }
289
 
    if(nameOrAbbrOrLocale==NULL || fillIn == NULL || capacity<0){
290
 
        *err = U_ILLEGAL_ARGUMENT_ERROR;
291
 
        return numFilled;
292
 
    }
293
 
    /* try the Names array first */
294
 
    strIndex = findStringIndex(scriptNames, nameOrAbbrOrLocale, sizeof(scriptNames)/sizeof(*scriptNames));
295
 
    
296
 
    if(strIndex>=0){ 
297
 
        code = (UScriptCode) scriptNameCodes[strIndex];
298
 
        len = 1;
299
 
    }
300
 
    /* we did not find in names array so try abbr array*/
301
 
    if(code ==USCRIPT_INVALID_CODE){
302
 
        strIndex = findStringIndex(scriptAbbr, nameOrAbbrOrLocale, sizeof(scriptAbbr)/sizeof(*scriptAbbr));
303
 
        if(strIndex>=0){ 
304
 
            code = (UScriptCode) scriptAbbrCodes[strIndex];
305
 
            len = 1;
306
 
        }
307
 
    }
308
 
 
309
 
    /* we still haven't found it try locale */
310
 
    if(code==USCRIPT_INVALID_CODE){
311
 
        UResourceBundle* resB = ures_open(u_getDataDirectory(),nameOrAbbrOrLocale,err);
312
 
        if(U_SUCCESS(*err)&& *err != U_USING_DEFAULT_ERROR){
313
 
            UResourceBundle* resD = ures_getByKey(resB,kLocaleScript,NULL,err);
314
 
            int index =0;
315
 
            if(U_SUCCESS(*err) ){
316
 
                len =0;
317
 
                while(ures_hasNext(resD)){
318
 
                    const UChar* name = ures_getNextString(resD,&len,NULL,err);
319
 
                    if(U_SUCCESS(*err)){
320
 
                        char cName[50] = {'\0'};
321
 
                        u_UCharsToChars(name,cName,len);
322
 
                        index = findStringIndex(scriptAbbr, cName, sizeof(scriptAbbr)/sizeof(*scriptAbbr));
323
 
                        code = (UScriptCode) scriptAbbrCodes[index];
324
 
                        /* got the script code now fill in the buffer */
325
 
                        if(numFilled<=capacity){ 
326
 
                            *(fillIn)++=code;
327
 
                            numFilled++;
328
 
                        }else{
329
 
                            ures_close(resD);
330
 
                            ures_close(resB);
331
 
                            *err=U_BUFFER_OVERFLOW_ERROR;
332
 
                            return len;
333
 
                        }
334
 
                    }
335
 
                }
336
 
            }
337
 
            ures_close(resD);
338
 
        
339
 
        }
340
 
        ures_close(resB);
341
 
    }else{
342
 
        /* we found it */
343
 
        if(numFilled<=capacity){ 
344
 
            *(fillIn)++=code;
345
 
            numFilled++;
346
 
        }else{
347
 
            *err=U_BUFFER_OVERFLOW_ERROR;
348
 
            return len;
349
 
        }
350
 
    }
351
 
    return numFilled;
352
 
}
353
 
 
354
 
U_CAPI const char*  U_EXPORT2
355
 
uscript_getName(UScriptCode scriptCode){
356
 
    int index = -1;
357
 
    if(scriptCode > USCRIPT_CODE_LIMIT){
358
 
        return "";
359
 
    }
360
 
    index = findCodeIndex(scriptNameCodes,scriptCode,sizeof(scriptNameCodes)/sizeof(*scriptNameCodes));
361
 
    if(index >=0){
362
 
        return scriptNames[index];
363
 
    }else{
364
 
       return "";
365
 
    }
366
 
 
367
 
}
368
 
U_CAPI const char*  U_EXPORT2
369
 
uscript_getShortName(UScriptCode scriptCode){
370
 
    int index = -1;
371
 
    if(scriptCode > USCRIPT_CODE_LIMIT){
372
 
        return "";
373
 
    }
374
 
    index = findCodeIndex(scriptAbbrCodes,scriptCode,sizeof(scriptAbbrCodes)/sizeof(*scriptAbbrCodes));
375
 
    if(index >=0){
376
 
        return scriptAbbr[index];
377
 
    }else{
378
 
       return "";
379
 
    }
380
 
}