~ubuntu-branches/ubuntu/karmic/firebird2.1/karmic

« back to all changes in this revision

Viewing changes to src/jrd/IntlUtil.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Damyan Ivanov
  • Date: 2008-05-26 23:59:25 UTC
  • Revision ID: james.westby@ubuntu.com-20080526235925-2pnqj6nxpppoeaer
Tags: upstream-2.1.0.17798-0.ds2
ImportĀ upstreamĀ versionĀ 2.1.0.17798-0.ds2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 *      PROGRAM:        JRD International support
 
3
 *      MODULE:         IntlUtil.cpp
 
4
 *      DESCRIPTION:    INTL Utility functions
 
5
 *
 
6
 *  The contents of this file are subject to the Initial
 
7
 *  Developer's Public License Version 1.0 (the "License");
 
8
 *  you may not use this file except in compliance with the
 
9
 *  License. You may obtain a copy of the License at
 
10
 *  http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
 
11
 *
 
12
 *  Software distributed under the License is distributed AS IS,
 
13
 *  WITHOUT WARRANTY OF ANY KIND, either express or implied.
 
14
 *  See the License for the specific language governing rights
 
15
 *  and limitations under the License.
 
16
 *
 
17
 *  The Original Code was created by Adriano dos Santos Fernandes
 
18
 *  for the Firebird Open Source RDBMS project.
 
19
 *
 
20
 *  Copyright (c) 2006 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
 
21
 *  and all contributors signed below.
 
22
 *
 
23
 *  All Rights Reserved.
 
24
 *  Contributor(s): ______________________________________.
 
25
 */
 
26
 
 
27
#include "firebird.h"
 
28
#include "../jrd/IntlUtil.h"
 
29
#include "../jrd/unicode_util.h"
 
30
#include "../jrd/intl_classes.h"
 
31
#include "../intl/country_codes.h"
 
32
#include "../common/classes/auto.h"
 
33
#include "../common/classes/Aligner.h"
 
34
 
 
35
 
 
36
using Jrd::UnicodeUtil;
 
37
 
 
38
 
 
39
namespace
 
40
{
 
41
        struct TextTypeImpl
 
42
        {
 
43
                TextTypeImpl(charset* a_cs, UnicodeUtil::Utf16Collation* a_collation)
 
44
                        : cs(a_cs),
 
45
                          collation(a_collation)
 
46
                {
 
47
                }
 
48
 
 
49
                ~TextTypeImpl()
 
50
                {
 
51
                        if (cs->charset_fn_destroy)
 
52
                                cs->charset_fn_destroy(cs);
 
53
 
 
54
                        delete cs;
 
55
                        delete collation;
 
56
                }
 
57
 
 
58
                charset* cs;
 
59
                UnicodeUtil::Utf16Collation* collation;
 
60
        };
 
61
}
 
62
 
 
63
 
 
64
namespace Firebird {
 
65
 
 
66
 
 
67
static void unicodeDestroy(texttype* tt);
 
68
static USHORT unicodeKeyLength(texttype* tt, USHORT len);
 
69
static USHORT unicodeStrToKey(texttype* tt, USHORT srcLen, const UCHAR* src,
 
70
        USHORT dstLen, UCHAR* dst, USHORT keyType);
 
71
static SSHORT unicodeCompare(texttype* tt, ULONG len1, const UCHAR* str1,
 
72
        ULONG len2, const UCHAR* str2, INTL_BOOL* errorFlag);
 
73
static ULONG unicodeCanonical(texttype* tt, ULONG srcLen, const UCHAR* src,
 
74
        ULONG dstLen, UCHAR* dst);
 
75
 
 
76
 
 
77
string IntlUtil::generateSpecificAttributes(
 
78
        Jrd::CharSet* cs, SpecificAttributesMap& map)
 
79
{
 
80
        bool found = map.getFirst();
 
81
        string s;
 
82
 
 
83
        while (found)
 
84
        {
 
85
                UCHAR c[sizeof(ULONG)];
 
86
                ULONG size;
 
87
 
 
88
                SpecificAttribute* attribute = map.current();
 
89
 
 
90
                s += escapeAttribute(cs, attribute->first);
 
91
 
 
92
                const USHORT equalChar = '=';
 
93
 
 
94
                size = cs->getConvFromUnicode().convert(
 
95
                        sizeof(equalChar), (const UCHAR*)&equalChar,
 
96
                        sizeof(c), c);
 
97
 
 
98
                s += string((const char*)&c, size);
 
99
 
 
100
                s += escapeAttribute(cs, attribute->second);
 
101
 
 
102
                found = map.getNext();
 
103
 
 
104
                if (found)
 
105
                {
 
106
                        const USHORT semiColonChar = ';';
 
107
                        size = cs->getConvFromUnicode().convert(
 
108
                                sizeof(semiColonChar), (const UCHAR*)&semiColonChar, sizeof(c), c);
 
109
 
 
110
                        s += string((const char*)&c, size);
 
111
                }
 
112
        }
 
113
 
 
114
        return s;
 
115
}
 
116
 
 
117
 
 
118
bool IntlUtil::parseSpecificAttributes(
 
119
        Jrd::CharSet* cs, ULONG len, const UCHAR* s, SpecificAttributesMap* map)
 
120
{
 
121
        // Note that the map isn't cleared.
 
122
        // Old attributes will be combined with the new ones.
 
123
 
 
124
        const UCHAR* p = s;
 
125
        const UCHAR* const end = s + len;
 
126
        ULONG size = 0;
 
127
 
 
128
        readAttributeChar(cs, &p, end, &size, true);
 
129
 
 
130
        while (p < end)
 
131
        {
 
132
                while (p < end && size == cs->getSpaceLength() &&
 
133
                           memcmp(p, cs->getSpace(), cs->getSpaceLength()) == 0)
 
134
                {
 
135
                        if (!readAttributeChar(cs, &p, end, &size, true))
 
136
                                return true;
 
137
                }
 
138
 
 
139
                const UCHAR* start = p;
 
140
 
 
141
                UCHAR uc[sizeof(ULONG)];
 
142
                ULONG uSize;
 
143
 
 
144
                while (p < end)
 
145
                {
 
146
                        uSize = cs->getConvToUnicode().convert(size, p, sizeof(uc), uc);
 
147
                                
 
148
                        if (uSize == 2 &&
 
149
                                         ((*(USHORT*)uc >= 'A' && *(USHORT*)uc <= 'Z') ||
 
150
                                          (*(USHORT*)uc >= 'a' && *(USHORT*)uc <= 'z') ||
 
151
                                          *(USHORT*)uc == '-' || *(USHORT*)uc == '_'))
 
152
                        {
 
153
                                if (!readAttributeChar(cs, &p, end, &size, true))
 
154
                                        return false;
 
155
                        }
 
156
                        else
 
157
                                break;
 
158
                }
 
159
 
 
160
                if (p - start == 0)
 
161
                        return false;
 
162
 
 
163
                string name = string((const char*)start, p - start);
 
164
                name = unescapeAttribute(cs, name);
 
165
 
 
166
                while (p < end && size == cs->getSpaceLength() &&
 
167
                           memcmp(p, cs->getSpace(), cs->getSpaceLength()) == 0)
 
168
                {
 
169
                        if (!readAttributeChar(cs, &p, end, &size, true))
 
170
                                return false;
 
171
                }
 
172
 
 
173
                uSize = cs->getConvToUnicode().convert(size, p, sizeof(uc), uc);
 
174
 
 
175
                if (uSize != 2 || *(USHORT*)uc != '=')
 
176
                        return false;
 
177
 
 
178
                string value;
 
179
 
 
180
                if (readAttributeChar(cs, &p, end, &size, true))
 
181
                {
 
182
                        while (p < end && size == cs->getSpaceLength() &&
 
183
                                   memcmp(p, cs->getSpace(), cs->getSpaceLength()) == 0)
 
184
                        {
 
185
                                if (!readAttributeChar(cs, &p, end, &size, true))
 
186
                                        return false;
 
187
                        }
 
188
 
 
189
                        const UCHAR* endNoSpace = start = p;
 
190
 
 
191
                        while (p < end)
 
192
                        {
 
193
                                uSize = cs->getConvToUnicode().convert(size, p, sizeof(uc), uc);
 
194
 
 
195
                                if (uSize != 2 || *(USHORT*)uc != ';')
 
196
                                {
 
197
                                        if (!(size == cs->getSpaceLength() &&
 
198
                                                  memcmp(p, cs->getSpace(), cs->getSpaceLength()) == 0))
 
199
                                        {
 
200
                                                endNoSpace = p + size;
 
201
                                        }
 
202
 
 
203
                                        if (!readAttributeChar(cs, &p, end, &size, true))
 
204
                                                break;
 
205
                                }
 
206
                                else
 
207
                                        break;
 
208
                        }
 
209
 
 
210
                        value = unescapeAttribute(cs,
 
211
                                string((const char*)start, endNoSpace - start));
 
212
 
 
213
                        if (p < end)
 
214
                                readAttributeChar(cs, &p, end, &size, true);    // skip the semicolon
 
215
                }
 
216
 
 
217
                if (value.isEmpty())
 
218
                        map->remove(name);
 
219
                else
 
220
                        map->put(name, value);
 
221
        }
 
222
 
 
223
        return true;
 
224
}
 
225
 
 
226
 
 
227
string IntlUtil::convertAsciiToUtf16(const string& ascii)
 
228
{
 
229
        string s;
 
230
        const char* end = ascii.c_str() + ascii.length();
 
231
 
 
232
        for (const char* p = ascii.c_str(); p < end; ++p)
 
233
        {
 
234
                USHORT c = *(UCHAR*) p;
 
235
                s.append((char*) &c, sizeof(c));
 
236
        }
 
237
 
 
238
        return s;
 
239
}
 
240
 
 
241
 
 
242
string IntlUtil::convertUtf16ToAscii(const string& utf16, bool* error)
 
243
{
 
244
        fb_assert(utf16.length() % sizeof(USHORT) == 0);
 
245
 
 
246
        string s;
 
247
        const USHORT* end = (const USHORT*) (utf16.c_str() + utf16.length());
 
248
 
 
249
        for (const USHORT* p = (const USHORT*) utf16.c_str(); p < end; ++p)
 
250
        {
 
251
                if (*p <= 0xFF)
 
252
                        s.append((UCHAR) *p);
 
253
                else
 
254
                {
 
255
                        *error = true;
 
256
                        return "";
 
257
                }
 
258
        }
 
259
 
 
260
        *error = false;
 
261
 
 
262
        return s;
 
263
}
 
264
 
 
265
 
 
266
ULONG IntlUtil::cvtAsciiToUtf16(csconvert* obj, ULONG nSrc, const UCHAR* pSrc,
 
267
        ULONG nDest, UCHAR* ppDest, USHORT* err_code, ULONG* err_position)
 
268
{
 
269
/**************************************
 
270
 *
 
271
 *      c v t A s c i i T o U t f 1 6
 
272
 *
 
273
 **************************************
 
274
 *
 
275
 * Functional description
 
276
 *      Convert CHARACTER SET ASCII to UTF-16.
 
277
 *      Byte values below 128 treated as ASCII.
 
278
 *      Byte values >= 128 create BAD_INPUT
 
279
 *
 
280
 *************************************/
 
281
        fb_assert(obj != NULL);
 
282
        fb_assert((pSrc != NULL) || (ppDest == NULL));
 
283
        fb_assert(err_code != NULL);
 
284
 
 
285
        *err_code = 0;
 
286
        if (ppDest == NULL)                     /* length estimate needed? */
 
287
                return (2 * nSrc);
 
288
 
 
289
        Firebird::OutAligner<USHORT> d(ppDest, nDest);
 
290
        USHORT* pDest = d;
 
291
 
 
292
        const USHORT* const pStart = pDest;
 
293
        const UCHAR* const pStart_src = pSrc;
 
294
        while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) {
 
295
                if (*pSrc > 127) {
 
296
                        *err_code = CS_BAD_INPUT;
 
297
                        break;
 
298
                }
 
299
                *pDest++ = *pSrc++;
 
300
                nDest -= sizeof(*pDest);
 
301
                nSrc -= sizeof(*pSrc);
 
302
        }
 
303
        if (!*err_code && nSrc) {
 
304
                *err_code = CS_TRUNCATION_ERROR;
 
305
        }
 
306
        *err_position = (pSrc - pStart_src) * sizeof(*pSrc);
 
307
 
 
308
        return ((pDest - pStart) * sizeof(*pDest));
 
309
}
 
310
 
 
311
 
 
312
ULONG IntlUtil::cvtUtf16ToAscii(csconvert* obj, ULONG nSrc, const UCHAR* ppSrc,
 
313
        ULONG nDest, UCHAR* pDest, USHORT* err_code, ULONG* err_position)
 
314
{
 
315
/**************************************
 
316
 *
 
317
 *      c v t U t f 1 6 T o A s c i i
 
318
 *
 
319
 **************************************
 
320
 *
 
321
 * Functional description
 
322
 *      Convert UTF16 to CHARACTER SET ASCII.
 
323
 *      Byte values below 128 treated as ASCII.
 
324
 *      Byte values >= 128 create CONVERT_ERROR
 
325
 *
 
326
 *************************************/
 
327
        fb_assert(obj != NULL);
 
328
        fb_assert((ppSrc != NULL) || (pDest == NULL));
 
329
        fb_assert(err_code != NULL);
 
330
 
 
331
        *err_code = 0;
 
332
        if (pDest == NULL)                      /* length estimate needed? */
 
333
                return (nSrc / 2);
 
334
 
 
335
        Firebird::Aligner<USHORT> s(ppSrc, nSrc);
 
336
        const USHORT* pSrc = s;
 
337
 
 
338
        const UCHAR* const pStart = pDest;
 
339
        const USHORT* const pStart_src = pSrc;
 
340
        while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) {
 
341
                if (*pSrc > 127) {
 
342
                        *err_code = CS_CONVERT_ERROR;
 
343
                        break;
 
344
                }
 
345
                *pDest++ = *pSrc++;
 
346
                nDest -= sizeof(*pDest);
 
347
                nSrc -= sizeof(*pSrc);
 
348
        }
 
349
        if (!*err_code && nSrc) {
 
350
                *err_code = CS_TRUNCATION_ERROR;
 
351
        }
 
352
        *err_position = (pSrc - pStart_src) * sizeof(*pSrc);
 
353
 
 
354
        return ((pDest - pStart) * sizeof(*pDest));
 
355
}
 
356
 
 
357
 
 
358
void IntlUtil::initAsciiCharset(charset* cs)
 
359
{
 
360
        initNarrowCharset(cs, "ASCII");
 
361
        initConvert(&cs->charset_to_unicode, cvtAsciiToUtf16);
 
362
        initConvert(&cs->charset_from_unicode, cvtUtf16ToAscii);
 
363
}
 
364
 
 
365
 
 
366
void IntlUtil::initConvert(csconvert* cvt, pfn_INTL_convert func)
 
367
{
 
368
        memset(cvt, 0, sizeof(*cvt));
 
369
        cvt->csconvert_version = CSCONVERT_VERSION_1;
 
370
        cvt->csconvert_name = (const ASCII*) "DIRECT";
 
371
        cvt->csconvert_fn_convert = func;
 
372
}
 
373
 
 
374
 
 
375
void IntlUtil::initNarrowCharset(charset* cs, const ASCII* name)
 
376
{
 
377
        memset(cs, 0, sizeof(*cs));
 
378
        cs->charset_version = CHARSET_VERSION_1;
 
379
        cs->charset_name = name;
 
380
        cs->charset_flags |= CHARSET_ASCII_BASED;
 
381
        cs->charset_min_bytes_per_char = 1;
 
382
        cs->charset_max_bytes_per_char = 1;
 
383
        cs->charset_space_length = 1;
 
384
        cs->charset_space_character = (const BYTE*) " ";
 
385
        cs->charset_fn_well_formed = NULL;
 
386
}
 
387
 
 
388
 
 
389
bool IntlUtil::initUnicodeCollation(texttype* tt, charset* cs, const ASCII* name,
 
390
        USHORT attributes, const UCharBuffer& specificAttributes, const string& configInfo)
 
391
{
 
392
        // name comes from stack. Copy it.
 
393
        ASCII* nameCopy = new ASCII[strlen(name) + 1];
 
394
        strcpy(nameCopy, name);
 
395
        tt->texttype_name = nameCopy;
 
396
 
 
397
        tt->texttype_version = TEXTTYPE_VERSION_1;
 
398
        tt->texttype_country = CC_INTL;
 
399
        tt->texttype_fn_destroy = unicodeDestroy;
 
400
        tt->texttype_fn_compare = unicodeCompare;
 
401
        tt->texttype_fn_key_length = unicodeKeyLength;
 
402
        tt->texttype_fn_string_to_key = unicodeStrToKey;
 
403
 
 
404
        IntlUtil::SpecificAttributesMap map;
 
405
 
 
406
        Jrd::CharSet* charSet = NULL;
 
407
 
 
408
        try
 
409
        {
 
410
                charSet = Jrd::CharSet::createInstance(*getDefaultMemoryPool(), 0, cs);
 
411
                IntlUtil::parseSpecificAttributes(charSet, specificAttributes.getCount(),
 
412
                        specificAttributes.begin(), &map);
 
413
                delete charSet;
 
414
        }
 
415
        catch (...)
 
416
        {
 
417
                delete charSet;
 
418
                return false;
 
419
        }
 
420
 
 
421
        IntlUtil::SpecificAttributesMap map16;
 
422
 
 
423
        bool found = map.getFirst();
 
424
 
 
425
        while (found)
 
426
        {
 
427
                UCharBuffer s1, s2;
 
428
                USHORT errCode;
 
429
                ULONG errPosition;
 
430
 
 
431
                s1.resize(cs->charset_to_unicode.csconvert_fn_convert(
 
432
                        &cs->charset_to_unicode, map.current()->first.length(), NULL, 0, NULL, &errCode, &errPosition));
 
433
                s1.resize(cs->charset_to_unicode.csconvert_fn_convert(
 
434
                        &cs->charset_to_unicode, map.current()->first.length(), (UCHAR*) map.current()->first.c_str(),
 
435
                        s1.getCapacity(), s1.begin(), &errCode, &errPosition));
 
436
 
 
437
                s2.resize(cs->charset_to_unicode.csconvert_fn_convert(
 
438
                        &cs->charset_to_unicode, map.current()->second.length(), NULL, 0, NULL, &errCode, &errPosition));
 
439
                s2.resize(cs->charset_to_unicode.csconvert_fn_convert(
 
440
                        &cs->charset_to_unicode, map.current()->second.length(), (UCHAR*) map.current()->second.c_str(),
 
441
                        s2.getCapacity(), s2.begin(), &errCode, &errPosition));
 
442
 
 
443
                map16.put(string((char*) s1.begin(), s1.getCount()), string((char*) s2.begin(), s2.getCount()));
 
444
 
 
445
                found = map.getNext();
 
446
        }
 
447
 
 
448
        UnicodeUtil::Utf16Collation* collation =
 
449
                UnicodeUtil::Utf16Collation::create(tt, attributes, map16, configInfo);
 
450
 
 
451
        if (!collation)
 
452
                return false;
 
453
 
 
454
        tt->texttype_impl = new TextTypeImpl(cs, collation);
 
455
 
 
456
        if (tt->texttype_canonical_width != 0)
 
457
                tt->texttype_fn_canonical = unicodeCanonical;
 
458
 
 
459
        return true;
 
460
}
 
461
 
 
462
 
 
463
ULONG IntlUtil::toLower(Jrd::CharSet* cs, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst,
 
464
        const ULONG* exceptions)
 
465
{
 
466
        const ULONG utf16_length = cs->getConvToUnicode().convertLength(srcLen);
 
467
        Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16_str;
 
468
        UCHAR* utf16_ptr;
 
469
 
 
470
        if (dstLen >= utf16_length)     // if dst buffer is sufficient large, use it as intermediate
 
471
                utf16_ptr = dst;
 
472
        else
 
473
                utf16_ptr = utf16_str.getBuffer(utf16_length);
 
474
 
 
475
        // convert to UTF-16
 
476
        srcLen = cs->getConvToUnicode().convert(srcLen, src, utf16_length, utf16_ptr);
 
477
 
 
478
        // convert to lowercase
 
479
        Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> lower_str;
 
480
        srcLen = UnicodeUtil::utf16LowerCase(srcLen, Firebird::Aligner<USHORT>(utf16_ptr, srcLen),
 
481
                utf16_length, Firebird::OutAligner<USHORT>(lower_str.getBuffer(utf16_length), utf16_length), 
 
482
                exceptions);
 
483
 
 
484
        // convert to original character set
 
485
        return cs->getConvFromUnicode().convert(srcLen, lower_str.begin(), dstLen, dst);
 
486
}
 
487
 
 
488
 
 
489
ULONG IntlUtil::toUpper(Jrd::CharSet* cs, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst,
 
490
        const ULONG* exceptions)
 
491
{
 
492
        const ULONG utf16_length = cs->getConvToUnicode().convertLength(srcLen);
 
493
        Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16_str;
 
494
        UCHAR* utf16_ptr;
 
495
 
 
496
        if (dstLen >= utf16_length)     // if dst buffer is sufficient large, use it as intermediate
 
497
                utf16_ptr = dst;
 
498
        else
 
499
                utf16_ptr = utf16_str.getBuffer(utf16_length);
 
500
 
 
501
        // convert to UTF-16
 
502
        srcLen = cs->getConvToUnicode().convert(srcLen, src, utf16_length, utf16_ptr);
 
503
 
 
504
        // convert to uppercase
 
505
        Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> upper_str;
 
506
        srcLen = UnicodeUtil::utf16UpperCase(srcLen, Firebird::Aligner<USHORT>(utf16_ptr, srcLen),
 
507
                utf16_length, Firebird::OutAligner<USHORT>(upper_str.getBuffer(utf16_length), utf16_length), 
 
508
                exceptions);
 
509
 
 
510
        // convert to original character set
 
511
        return cs->getConvFromUnicode().convert(srcLen, upper_str.begin(), dstLen, dst);
 
512
}
 
513
 
 
514
 
 
515
bool IntlUtil::readOneChar(Jrd::CharSet* cs, const UCHAR** s, const UCHAR* end, ULONG* size)
 
516
{
 
517
        (*s) += *size;
 
518
 
 
519
        if (*s >= end)
 
520
        {
 
521
                (*s) = end;
 
522
                *size = 0;
 
523
                return false;
 
524
        }
 
525
 
 
526
        UCHAR c[sizeof(ULONG)];
 
527
        *size = cs->substring(end - *s, *s, sizeof(c), c, 0, 1);
 
528
 
 
529
        return true;
 
530
}
 
531
 
 
532
 
 
533
// Transform ICU-VERSION attribute (given by the user) in COLL-VERSION (to be stored).
 
534
bool IntlUtil::setupIcuAttributes(charset* cs, const string& specificAttributes,
 
535
        const string& configInfo, string& newSpecificAttributes)
 
536
{
 
537
        AutoPtr<Jrd::CharSet> charSet(Jrd::CharSet::createInstance(*getDefaultMemoryPool(), 0, cs));
 
538
 
 
539
        IntlUtil::SpecificAttributesMap map;
 
540
        if (!IntlUtil::parseSpecificAttributes(charSet, specificAttributes.length(),
 
541
                        (const UCHAR*) specificAttributes.begin(), &map))
 
542
        {
 
543
                return false;
 
544
        }
 
545
 
 
546
        string icuVersion;
 
547
        map.get("ICU-VERSION", icuVersion);
 
548
 
 
549
        string collVersion;
 
550
        if (!UnicodeUtil::getCollVersion(icuVersion, configInfo, collVersion))
 
551
                return false;
 
552
 
 
553
        map.remove("ICU-VERSION");
 
554
        map.remove("COLL-VERSION");
 
555
 
 
556
        if (collVersion.hasData())
 
557
                map.put("COLL-VERSION", collVersion);
 
558
 
 
559
        newSpecificAttributes = IntlUtil::generateSpecificAttributes(charSet, map);
 
560
        return true;
 
561
}
 
562
 
 
563
 
 
564
string IntlUtil::escapeAttribute(Jrd::CharSet* cs, const string& s)
 
565
{
 
566
        string ret;
 
567
        const UCHAR* p = (const UCHAR*)s.begin();
 
568
        const UCHAR* end = (const UCHAR*)s.end();
 
569
        ULONG size = 0;
 
570
 
 
571
        while (readOneChar(cs, &p, end, &size))
 
572
        {
 
573
                UCHAR uc[sizeof(ULONG)];
 
574
 
 
575
                ULONG uSize = cs->getConvToUnicode().convert(size, p, sizeof(uc), uc);
 
576
 
 
577
                if (uSize == 2)
 
578
                {
 
579
                        if (*(USHORT*)uc == '\\' || *(USHORT*)uc == '=' || *(USHORT*)uc == ';')
 
580
                        {
 
581
                                *(USHORT*)uc = '\\';
 
582
                                UCHAR bytes[sizeof(ULONG)];
 
583
                                
 
584
                                ULONG bytesSize = cs->getConvFromUnicode().convert(
 
585
                                        sizeof(USHORT), uc, sizeof(bytes), bytes);
 
586
 
 
587
                                ret.append(string((const char*)bytes, bytesSize));
 
588
                        }
 
589
                }
 
590
 
 
591
                ret.append(string((const char*)p, size));
 
592
        }
 
593
 
 
594
        return ret;
 
595
}
 
596
 
 
597
 
 
598
string IntlUtil::unescapeAttribute(Jrd::CharSet* cs, const string& s)
 
599
{
 
600
        string ret;
 
601
        const UCHAR* p = (const UCHAR*)s.begin();
 
602
        const UCHAR* end = (const UCHAR*)s.end();
 
603
        ULONG size = 0;
 
604
 
 
605
        while (readAttributeChar(cs, &p, end, &size, false))
 
606
                ret.append(string((const char*)p, size));
 
607
 
 
608
        return ret;
 
609
}
 
610
 
 
611
 
 
612
bool IntlUtil::isAttributeEscape(Jrd::CharSet* cs, const UCHAR* s, ULONG size)
 
613
{
 
614
        UCHAR uc[sizeof(ULONG)];
 
615
        ULONG uSize = cs->getConvToUnicode().convert(size, s, sizeof(uc), uc);
 
616
 
 
617
        if (uSize == 2 && *(USHORT*)uc == '\\')
 
618
                return true;
 
619
 
 
620
        return false;
 
621
}
 
622
 
 
623
 
 
624
bool IntlUtil::readAttributeChar(Jrd::CharSet* cs, const UCHAR** s, const UCHAR* end, ULONG* size, bool returnEscape)
 
625
{
 
626
        if (readOneChar(cs, s, end, size))
 
627
        {
 
628
                if (isAttributeEscape(cs, *s, *size))
 
629
                {
 
630
                        const UCHAR* p = *s;
 
631
                        ULONG firstSize = *size;
 
632
 
 
633
                        if (readOneChar(cs, s, end, size))
 
634
                        {
 
635
                                if (returnEscape)
 
636
                                {
 
637
                                        *s = p;
 
638
                                        *size += firstSize;
 
639
                                }
 
640
                        }
 
641
                        else
 
642
                                return false;
 
643
                }
 
644
 
 
645
                return true;
 
646
        }
 
647
 
 
648
        return false;
 
649
}
 
650
 
 
651
 
 
652
static void unicodeDestroy(texttype* tt)
 
653
{
 
654
        delete [] const_cast<ASCII*>(tt->texttype_name);
 
655
        delete tt->texttype_impl;
 
656
}
 
657
 
 
658
 
 
659
static USHORT unicodeKeyLength(texttype* tt, USHORT len)
 
660
{
 
661
        return tt->texttype_impl->collation->keyLength(
 
662
                len / tt->texttype_impl->cs->charset_max_bytes_per_char * 4);
 
663
}
 
664
 
 
665
 
 
666
static USHORT unicodeStrToKey(texttype* tt, USHORT srcLen, const UCHAR* src,
 
667
        USHORT dstLen, UCHAR* dst, USHORT keyType)
 
668
{
 
669
        try
 
670
        {
 
671
                charset* cs = tt->texttype_impl->cs;
 
672
 
 
673
                HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str;
 
674
                USHORT errorCode;
 
675
                ULONG offendingPos;
 
676
 
 
677
                utf16Str.getBuffer(
 
678
                        cs->charset_to_unicode.csconvert_fn_convert(
 
679
                                &cs->charset_to_unicode,
 
680
                                srcLen,
 
681
                                src,
 
682
                                0,
 
683
                                NULL,
 
684
                                &errorCode,
 
685
                                &offendingPos));
 
686
 
 
687
                ULONG utf16Len = cs->charset_to_unicode.csconvert_fn_convert(
 
688
                        &cs->charset_to_unicode,
 
689
                        srcLen,
 
690
                        src,
 
691
                        utf16Str.getCapacity(),
 
692
                        utf16Str.begin(),
 
693
                        &errorCode,
 
694
                        &offendingPos);
 
695
 
 
696
                return tt->texttype_impl->collation->stringToKey(
 
697
                        utf16Len, (USHORT*)utf16Str.begin(), dstLen, dst, keyType);
 
698
        }
 
699
        catch (BadAlloc)
 
700
        {
 
701
                fb_assert(false);
 
702
                return INTL_BAD_KEY_LENGTH;
 
703
        }
 
704
}
 
705
 
 
706
 
 
707
static SSHORT unicodeCompare(texttype* tt, ULONG len1, const UCHAR* str1,
 
708
        ULONG len2, const UCHAR* str2, INTL_BOOL* errorFlag)
 
709
{
 
710
        try
 
711
        {
 
712
                *errorFlag = false;
 
713
 
 
714
                charset* cs = tt->texttype_impl->cs;
 
715
 
 
716
                HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str1;
 
717
                HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str2;
 
718
                USHORT errorCode;
 
719
                ULONG offendingPos;
 
720
 
 
721
                utf16Str1.getBuffer(
 
722
                        cs->charset_to_unicode.csconvert_fn_convert(
 
723
                                &cs->charset_to_unicode,
 
724
                                len1,
 
725
                                str1,
 
726
                                0,
 
727
                                NULL,
 
728
                                &errorCode,
 
729
                                &offendingPos));
 
730
 
 
731
                ULONG utf16Len1 = cs->charset_to_unicode.csconvert_fn_convert(
 
732
                        &cs->charset_to_unicode,
 
733
                        len1,
 
734
                        str1,
 
735
                        utf16Str1.getCapacity(),
 
736
                        utf16Str1.begin(),
 
737
                        &errorCode,
 
738
                        &offendingPos);
 
739
 
 
740
                utf16Str2.getBuffer(
 
741
                        cs->charset_to_unicode.csconvert_fn_convert(
 
742
                                &cs->charset_to_unicode,
 
743
                                len2,
 
744
                                str2,
 
745
                                0,
 
746
                                NULL,
 
747
                                &errorCode,
 
748
                                &offendingPos));
 
749
 
 
750
                ULONG utf16Len2 = cs->charset_to_unicode.csconvert_fn_convert(
 
751
                        &cs->charset_to_unicode,
 
752
                        len2,
 
753
                        str2,
 
754
                        utf16Str2.getCapacity(),
 
755
                        utf16Str2.begin(),
 
756
                        &errorCode,
 
757
                        &offendingPos);
 
758
 
 
759
                return tt->texttype_impl->collation->compare(
 
760
                        utf16Len1, (USHORT*)utf16Str1.begin(),
 
761
                        utf16Len2, (USHORT*)utf16Str2.begin(), errorFlag);
 
762
        }
 
763
        catch (BadAlloc)
 
764
        {
 
765
                fb_assert(false);
 
766
                return 0;
 
767
        }
 
768
}
 
769
 
 
770
 
 
771
static ULONG unicodeCanonical(texttype* tt, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst)
 
772
{
 
773
        try
 
774
        {
 
775
                charset* cs = tt->texttype_impl->cs;
 
776
 
 
777
                HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str;
 
778
                USHORT errorCode;
 
779
                ULONG offendingPos;
 
780
 
 
781
                utf16Str.getBuffer(
 
782
                        cs->charset_to_unicode.csconvert_fn_convert(
 
783
                                &cs->charset_to_unicode,
 
784
                                srcLen,
 
785
                                src,
 
786
                                0,
 
787
                                NULL,
 
788
                                &errorCode,
 
789
                                &offendingPos));
 
790
 
 
791
                ULONG utf16Len = cs->charset_to_unicode.csconvert_fn_convert(
 
792
                        &cs->charset_to_unicode,
 
793
                        srcLen,
 
794
                        src,
 
795
                        utf16Str.getCapacity(),
 
796
                        utf16Str.begin(),
 
797
                        &errorCode,
 
798
                        &offendingPos);
 
799
 
 
800
                return tt->texttype_impl->collation->canonical(
 
801
                        utf16Len, Firebird::Aligner<USHORT>(utf16Str.begin(), utf16Len),
 
802
                        dstLen, Firebird::OutAligner<ULONG>(dst, dstLen), NULL);
 
803
        }
 
804
        catch (BadAlloc)
 
805
        {
 
806
                fb_assert(false);
 
807
                return INTL_BAD_KEY_LENGTH;
 
808
        }
 
809
}
 
810
 
 
811
 
 
812
}       // namespace Firebird