1
/************* history ************
3
* COMPONENT: JRD MODULE: INTL.CPP
4
* generated by Marion V2.5 2/6/90
5
* from dev db on 4-JAN-1995
6
*****************************************************************
8
* PR 2002-06-02 Added ugly c hack in
9
* intl_back_compat_alloc_func_lookup.
10
* When someone has time we need to change the references to
11
* return (void*) function to something more C++ like
13
* 42 4711 3 11 17 tamlin 2001
14
* Added silly numbers before my name, and converted it to C++.
16
* 18850 daves 4-JAN-1995
17
* Fix gds__alloc usage
19
* 18837 deej 31-DEC-1994
20
* fixing up HARBOR_MERGE
22
* 18821 deej 27-DEC-1994
25
* 18789 jdavid 19-DEC-1994
28
* 17508 jdavid 15-JUL-1994
31
* 17500 daves 13-JUL-1994
32
* Bug 6645: Different calculation of partial keys
34
* 17202 katz 24-MAY-1994
35
* PC_PLATFORM requires the .dll extension
37
* 17191 katz 23-MAY-1994
38
* OS/2 requires the .dll extension
40
* 17180 katz 23-MAY-1994
41
* Define location of DLL on OS/2
43
* 17149 katz 20-MAY-1994
44
* In JRD, isc_arg_number arguments are SLONG's not int's
46
* 16633 daves 19-APR-1994
47
* Bug 6202: International licensing uses INTERNATIONAL product code
49
* 16555 katz 17-APR-1994
50
* The last argument of calls to ERR_post should be 0
52
* 16521 katz 14-APR-1994
53
* Borland C needs a decorated symbol to lookup
55
* 16403 daves 8-APR-1994
56
* Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
58
* 16141 katz 28-MAR-1994
59
* Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
61
* The contents of this file are subject to the Interbase Public
62
* License Version 1.0 (the "License"); you may not use this file
63
* except in compliance with the License. You may obtain a copy
64
* of the License at http://www.Inprise.com/IPL.html
66
* Software distributed under the License is distributed on an
67
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
68
* or implied. See the License for the specific language governing
69
* rights and limitations under the License.
71
* The Original Code was created by Inprise Corporation
72
* and its predecessors. Portions created by Inprise Corporation are
73
* Copyright (C) Inprise Corporation.
75
* All Rights Reserved.
76
* Contributor(s): ______________________________________.
78
* 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
80
* 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
88
* DESCRIPTION: International text support routines
90
* copyright (c) 1992, 1993 by Borland International
95
#include "../jrd/common.h"
97
#include "../jrd/jrd.h"
98
#include "../jrd/req.h"
99
#include "../jrd/val.h"
100
#include "gen/iberror.h"
101
#include "../jrd/intl.h"
102
#include "../jrd/intl_classes.h"
103
#include "../jrd/ods.h"
104
#include "../jrd/btr.h"
105
#include "../intl/charsets.h"
106
#include "../intl/country_codes.h"
107
#include "../jrd/gdsassert.h"
108
//#include "../jrd/license.h"
110
#include "../intl/ld_proto.h"
112
#include "../jrd/cvt_proto.h"
113
#include "../jrd/err_proto.h"
114
#include "../jrd/fun_proto.h"
115
#include "../jrd/gds_proto.h"
116
#include "../jrd/iberr_proto.h"
117
#include "../jrd/intl_proto.h"
118
#include "../jrd/isc_proto.h"
119
#include "../jrd/lck_proto.h"
120
#include "../jrd/met_proto.h"
121
#include "../jrd/thd.h"
122
#include "../jrd/evl_string.h"
123
#include "../jrd/intlobj_new.h"
124
#include "../jrd/jrd.h"
125
#include "../jrd/mov_proto.h"
126
#include "../jrd/IntlManager.h"
127
#include "../common/classes/init.h"
131
#define IS_TEXT(x) (((x)->dsc_dtype == dtype_text) ||\
132
((x)->dsc_dtype == dtype_varying)||\
133
((x)->dsc_dtype == dtype_cstring))
136
static bool all_spaces(thread_db*, CHARSET_ID, const BYTE*, ULONG, ULONG);
137
static int blocking_ast_collation(void* ast_object);
138
static void pad_spaces(thread_db*, CHARSET_ID, BYTE *, ULONG);
139
static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info);
140
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info);
143
// Classes and structures used internally to this file and intl implementation
144
class CharSetContainer
147
CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info);
152
for (size_t i = 0; i < charset_collations.getCount(); i++)
153
if (charset_collations[i])
154
charset_collations[i]->destroy();
157
CharSet* getCharSet() { return cs; }
159
Collation* lookupCollation(thread_db* tdbb, USHORT tt_id);
160
void unloadCollation(thread_db* tdbb, USHORT tt_id);
162
CsConvert lookupConverter(thread_db* tdbb, CHARSET_ID to_cs);
164
static CharSetContainer* lookupCharset(thread_db* tdbb, USHORT ttype);
165
static Lock* createCollationLock(thread_db* tdbb, USHORT ttype);
168
Firebird::Array<Collation*> charset_collations;
173
CharSetContainer* CharSetContainer::lookupCharset(thread_db* tdbb, USHORT ttype)
175
/**************************************
177
* l o o k u p C h a r s e t
179
**************************************
181
* Functional description
183
* Lookup a character set descriptor.
185
* First, search the appropriate vector that hangs
186
* off the dbb. If not found, then call the lower
187
* level lookup routine to allocate it, or punt
188
* if we don't know about the charset.
194
**************************************/
195
CharSetContainer *cs = NULL;
198
Database* dbb = tdbb->getDatabase();
200
USHORT id = TTYPE_TO_CHARSET(ttype);
201
if (id == CS_dynamic)
202
id = tdbb->getAttachment()->att_charset;
204
if (id >= dbb->dbb_charsets.getCount())
205
dbb->dbb_charsets.resize(id + 10);
207
cs = dbb->dbb_charsets[id];
209
// allocate a new character set object if we couldn't find one.
214
info.charsetName = "UTF16";
216
if ((id == CS_UTF16) || MET_get_char_coll_subtype_info(tdbb, id, &info))
218
dbb->dbb_charsets[id] = cs =
219
FB_NEW(*dbb->dbb_permanent) CharSetContainer(*dbb->dbb_permanent, id, &info);
222
ERR_post(isc_text_subtype, isc_arg_number, (ISC_STATUS) ttype, 0);
228
Lock* CharSetContainer::createCollationLock(thread_db* tdbb, USHORT ttype)
230
/**************************************
232
* c r e a t e C o l l a t i o n L o c k
234
**************************************
236
* Functional description
237
* Create a collation lock.
239
**************************************/
240
Lock* lock = FB_NEW_RPT(*tdbb->getDatabase()->dbb_permanent, 0) Lock;
241
lock->lck_parent = tdbb->getDatabase()->dbb_lock;
242
lock->lck_dbb = tdbb->getDatabase();
243
lock->lck_key.lck_long = ttype;
244
lock->lck_length = sizeof(lock->lck_key.lck_long);
245
lock->lck_type = LCK_tt_exist;
246
lock->lck_owner_handle = LCK_get_owner_handle(tdbb, lock->lck_type);
247
lock->lck_object = NULL;
248
lock->lck_ast = blocking_ast_collation;
253
CharSetContainer::CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info) :
254
charset_collations(p),
257
charset* csL = FB_NEW(p) charset;
258
memset(csL, 0, sizeof(charset));
260
if (lookup_charset(csL, info) && (csL->charset_flags & CHARSET_ASCII_BASED))
261
this->cs = CharSet::createInstance(p, cs_id, csL);
265
ERR_post(isc_charset_not_installed, isc_arg_string, ERR_cstring(info->charsetName.c_str()), 0);
269
CsConvert CharSetContainer::lookupConverter(thread_db* tdbb, CHARSET_ID toCsId)
271
if (toCsId == CS_UTF16)
272
return CsConvert(cs->getStruct(), NULL);
275
CharSet* toCs = INTL_charset_lookup(tdbb, toCsId);
277
if (cs->getId() == CS_UTF16)
278
return CsConvert(NULL, toCs->getStruct());
280
return CsConvert(cs->getStruct(), toCs->getStruct());
284
Collation* CharSetContainer::lookupCollation(thread_db* tdbb, USHORT tt_id)
286
const USHORT id = TTYPE_TO_COLLATION(tt_id);
288
if (id < charset_collations.getCount() && charset_collations[id] != NULL)
290
if (charset_collations[id]->obsolete)
292
if (charset_collations[id]->existenceLock)
293
LCK_release(tdbb, charset_collations[id]->existenceLock);
295
charset_collations[id]->destroy();
296
delete charset_collations[id];
297
charset_collations[id] = NULL;
300
return charset_collations[id];
304
if (MET_get_char_coll_subtype_info(tdbb, tt_id, &info))
306
CharSet* charset = INTL_charset_lookup(tdbb, TTYPE_TO_CHARSET(tt_id));
308
if (TTYPE_TO_CHARSET(tt_id) != CS_METADATA)
310
Firebird::UCharBuffer specificAttributes;
311
ULONG size = info.specificAttributes.getCount() * charset->maxBytesPerChar();
313
size = INTL_convert_bytes(tdbb, TTYPE_TO_CHARSET(tt_id),
314
specificAttributes.getBuffer(size), size,
315
CS_METADATA, info.specificAttributes.begin(),
316
info.specificAttributes.getCount(), ERR_post);
317
specificAttributes.shrink(size);
318
info.specificAttributes = specificAttributes;
321
texttype* tt = FB_NEW(*tdbb->getDatabase()->dbb_permanent) texttype;
322
memset(tt, 0, sizeof(texttype));
324
if (!lookup_texttype(tt, &info))
327
ERR_post(isc_collation_not_installed,
328
isc_arg_string, ERR_cstring(info.collationName.c_str()),
329
isc_arg_string, ERR_cstring(info.charsetName.c_str()), 0);
332
if (charset_collations.getCount() <= id)
333
charset_collations.grow(id + 1);
335
fb_assert((tt->texttype_canonical_width == 0 && tt->texttype_fn_canonical == NULL) ||
336
(tt->texttype_canonical_width != 0 && tt->texttype_fn_canonical != NULL));
338
if (tt->texttype_canonical_width == 0)
340
if (charset->isMultiByte())
341
tt->texttype_canonical_width = sizeof(ULONG); // UTF-32
344
tt->texttype_canonical_width = charset->minBytesPerChar();
345
// canonical is equal to string, then TEXTTYPE_DIRECT_MATCH can be turned on
346
tt->texttype_flags |= TEXTTYPE_DIRECT_MATCH;
350
charset_collations[id] = Collation::createInstance(*tdbb->getDatabase()->dbb_permanent, tt_id, tt, charset);
351
charset_collations[id]->name = info.collationName;
353
// we don't need a lock in the charset
356
Lock* lock = charset_collations[id]->existenceLock =
357
CharSetContainer::createCollationLock(tdbb, tt_id);
358
lock->lck_object = (blk*)charset_collations[id];
360
LCK_lock(tdbb, lock, LCK_SR, LCK_WAIT);
364
ERR_post(isc_text_subtype, isc_arg_number, (ISC_STATUS) tt_id, 0);
366
return charset_collations[id];
370
void CharSetContainer::unloadCollation(thread_db* tdbb, USHORT tt_id)
372
const USHORT id = TTYPE_TO_COLLATION(tt_id);
374
if (id < charset_collations.getCount() && charset_collations[id] != NULL)
376
if (charset_collations[id]->useCount != 0)
378
ERR_post(isc_no_meta_update,
379
isc_arg_gds, isc_obj_in_use,
380
isc_arg_string, charset_collations[id]->name.c_str(),
384
if (charset_collations[id]->existenceLock)
385
LCK_convert_non_blocking(tdbb, charset_collations[id]->existenceLock, LCK_EX, LCK_WAIT);
387
charset_collations[id]->obsolete = true;
389
if (charset_collations[id]->existenceLock)
391
LCK_release(tdbb, charset_collations[id]->existenceLock);
392
charset_collations[id]->existenceLock = NULL;
397
Lock* lock = CharSetContainer::createCollationLock(tdbb, tt_id);
399
LCK_lock(tdbb, lock, LCK_EX, LCK_WAIT);
400
LCK_release(tdbb, lock);
407
static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info)
409
return IntlManager::lookupCharSet(info->charsetName.c_str(), cs);
413
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info)
415
return IntlManager::lookupCollation(info->baseCollationName.c_str(), info->charsetName.c_str(),
416
info->attributes, info->specificAttributes.begin(),
417
info->specificAttributes.getCount(), info->ignoreAttributes, tt);
421
void Database::destroyIntlObjects()
423
for (size_t i = 0; i < dbb_charsets.getCount(); i++)
427
dbb_charsets[i]->destroy();
434
CHARSET_ID INTL_charset(thread_db* tdbb, USHORT ttype)
436
/**************************************
438
* I N T L _ c h a r s e t
440
**************************************
442
* Functional description
443
* Return the character set ID for a piece of text.
445
**************************************/
453
case ttype_unicode_fss:
454
return (CS_UNICODE_FSS);
459
return (tdbb->getAttachment()->att_charset);
461
return (TTYPE_TO_CHARSET(ttype));
466
int INTL_compare(thread_db* tdbb,
471
/**************************************
473
* I N T L _ c o m p a r e
475
**************************************
477
* Functional description
478
* Compare two pieces of international text.
480
**************************************/
483
fb_assert(pText1 != NULL);
484
fb_assert(pText2 != NULL);
485
fb_assert(IS_TEXT(pText1) && IS_TEXT(pText2));
486
fb_assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
489
/* normal compare routine from CVT_compare */
490
/* trailing spaces in strings are ignored for comparision */
494
USHORT length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);
498
USHORT length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);
500
/* YYY - by SQL II compare_type must be explicit in the
501
SQL statement if there is any doubt */
503
USHORT compare_type = MAX(t1, t2); /* YYY */
504
UCHAR buffer[MAX_KEY];
507
CHARSET_ID cs1 = INTL_charset(tdbb, t1);
508
CHARSET_ID cs2 = INTL_charset(tdbb, t2);
510
if (compare_type != t2) {
511
/* convert pText2 to pText1's type, if possible */
512
/* YYY - should failure to convert really return
514
Support joining a 437 & Latin1 Column, and we
515
pick the compare_type as 437, still only want the
517
But then, what about < operations, which make no
518
sense if the string cannot be expressed...
521
length2 = INTL_convert_bytes(tdbb, cs1,
522
buffer, sizeof(buffer),
523
cs2, p2, length2, err);
527
/* convert pText1 to pText2's type, if possible */
529
length1 = INTL_convert_bytes(tdbb, cs2,
530
buffer, sizeof(buffer),
531
cs1, p1, length1, err);
537
TextType* obj = INTL_texttype_lookup(tdbb, compare_type);
539
return obj->compare(length1, p1, length2, p2);
543
ULONG INTL_convert_bytes(thread_db* tdbb,
544
CHARSET_ID dest_type,
552
/**************************************
554
* I N T L _ c o n v e r t _ b y t e s
556
**************************************
558
* Functional description
559
* Given a string of bytes in one character set, convert it to another
562
* If (dest_ptr) is NULL, return the count of bytes needed to convert
563
* the string. This does not guarantee the string can be converted,
564
* the purpose of this is to allocate a large enough buffer.
567
* Length of resulting string, in bytes.
568
* calls (err) if conversion error occurs.
570
**************************************/
576
fb_assert(src_ptr != NULL);
577
fb_assert(src_type != dest_type);
578
fb_assert(err != NULL);
580
dest_type = INTL_charset(tdbb, dest_type);
581
src_type = INTL_charset(tdbb, src_type);
583
const UCHAR* const start_dest_ptr = dest_ptr;
585
if ((dest_type == CS_BINARY) ||
586
(dest_type == CS_NONE) ||
587
(src_type == CS_BINARY) ||
588
(src_type == CS_NONE))
590
/* See if we just need a length estimate */
591
if (dest_ptr == NULL)
594
if (dest_type != CS_BINARY && dest_type != CS_NONE)
596
CharSet* toCharSet = INTL_charset_lookup(tdbb, dest_type);
598
if (!toCharSet->wellFormed(src_len, src_ptr))
599
(*err)(isc_malformed_string, 0);
602
len = MIN(dest_len, src_len);
605
*dest_ptr++ = *src_ptr++;
608
/* See if only space characters are remaining */
609
len = src_len - MIN(dest_len, src_len);
610
if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0))
611
return (dest_ptr - start_dest_ptr);
613
(*err) (isc_arith_except, 0);
615
else if (src_len == 0)
618
/* character sets are known to be different */
620
/* Do we know an object from cs1 to cs2? */
622
CsConvert cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
623
return cs_obj.convert(src_len, src_ptr, dest_len, dest_ptr, NULL, true);
626
return (0); /* to remove compiler errors. This should never be executed */
630
CsConvert INTL_convert_lookup(thread_db* tdbb,
634
/**************************************
636
* I N T L _ c o n v e r t _ l o o k u p
638
**************************************
640
* Functional description
642
**************************************/
645
Database* dbb = tdbb->getDatabase();
648
if (from_cs == CS_dynamic)
649
from_cs = tdbb->getAttachment()->att_charset;
651
if (to_cs == CS_dynamic)
652
to_cs = tdbb->getAttachment()->att_charset;
654
/* Should from_cs == to_cs? be handled better? YYY */
656
fb_assert(from_cs != CS_dynamic);
657
fb_assert(to_cs != CS_dynamic);
659
CharSetContainer* charset = CharSetContainer::lookupCharset(tdbb, from_cs);
661
return charset->lookupConverter(tdbb, to_cs);
665
int INTL_convert_string(dsc* to, const dsc* from, FPTR_ERROR err)
667
/**************************************
669
* I N T L _ c o n v e r t _ s t r i n g
671
**************************************
673
* Functional description
674
* Convert a string from one type to another
677
* 0 if no error in conversion
678
* non-zero otherwise.
679
* CVC: Unfortunately, this function puts the source in the 2nd param,
680
* as opposed to the CVT routines, so const help mitigating coding mistakes.
682
**************************************/
684
/* Note: This function is called from outside the engine as
685
well as inside - we likely can't get rid of JRD_get_thread_data here */
686
thread_db* tdbb = JRD_get_thread_data();
687
if (tdbb == NULL) /* are we in the Engine? */
688
return (1); /* no, then can't access intl gah */
690
fb_assert(to != NULL);
691
fb_assert(from != NULL);
692
fb_assert(IS_TEXT(to) && IS_TEXT(from));
694
CHARSET_ID from_cs = INTL_charset(tdbb, INTL_TTYPE(from));
695
CHARSET_ID to_cs = INTL_charset(tdbb, INTL_TTYPE(to));
697
UCHAR* start = to->dsc_address;
700
/* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */
704
const USHORT from_len =
705
CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);
707
ULONG to_size, to_len, to_fill;
708
to_size = to_len = TEXT_LEN(to);
711
const UCHAR* q = from_ptr;
712
CharSet* toCharSet = INTL_charset_lookup(tdbb, to_cs);
715
switch (to->dsc_dtype) {
717
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
719
to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
720
from_cs, from_ptr, from_len, err);
722
to_fill = to_size - to_len;
723
from_fill = 0; /* Convert_bytes handles source truncation */
727
/* binary string can always be converted TO by byte-copy */
729
to_len = MIN(from_len, to_size);
730
if (!toCharSet->wellFormed(to_len, q))
731
(*err)(isc_malformed_string, 0);
733
from_fill = from_len - to_len;
734
to_fill = to_size - to_len;
742
pad_spaces(tdbb, to_cs, p, to_fill);
746
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
747
to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
748
from_cs, from_ptr, from_len, err);
750
to->dsc_address[to_len] = 0;
751
from_fill = 0; /* Convert_bytes handles source truncation */
754
/* binary string can always be converted TO by byte-copy */
756
to_len = MIN(from_len, to_size);
757
if (!toCharSet->wellFormed(to_len, q))
758
(*err)(isc_malformed_string, 0);
760
from_fill = from_len - to_len;
770
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
773
INTL_convert_bytes(tdbb, to_cs,
774
(start = reinterpret_cast<UCHAR*>(((vary*) to->dsc_address)->vary_string)),
775
to_size, from_cs, from_ptr, from_len, err);
777
((vary*) to->dsc_address)->vary_length = to_len;
778
from_fill = 0; /* Convert_bytes handles source truncation */
781
/* binary string can always be converted TO by byte-copy */
782
to_len = MIN(from_len, to_size);
783
if (!toCharSet->wellFormed(to_len, q))
784
(*err)(isc_malformed_string, 0);
786
from_fill = from_len - to_len;
787
((vary*) p)->vary_length = to_len;
788
start = p = reinterpret_cast<UCHAR*>(((vary*) p)->vary_string);
797
if (toCharSet->isMultiByte() &&
798
!(toCharSet->getFlags() & CHARSET_LEGACY_SEMANTICS) &&
799
toLength != 31 && /* allow non CHARSET_LEGACY_SEMANTICS to be used as connection charset */
800
toCharSet->length(toLength, start, false) > to_size / toCharSet->maxBytesPerChar())
802
(*err)(isc_arith_except, 0);
806
/* Make sure remaining characters on From string are spaces */
807
if (!all_spaces(tdbb, from_cs, q, from_fill, 0))
808
(*err) (isc_arith_except, 0);
814
int INTL_data(const dsc* pText)
816
/**************************************
820
**************************************
822
* Functional description
823
* Given an input text descriptor,
824
* return TRUE if the data pointed to represents
825
* international text (subject to user defined or non-binary
826
* collation or comparison).
828
**************************************/
830
fb_assert(pText != NULL);
835
if (!INTERNAL_TTYPE(pText))
841
int INTL_data_or_binary(const dsc* pText)
843
/**************************************
845
* I N T L _ d a t a _ o r _ b i n a r y
847
**************************************
849
* Functional description
851
**************************************/
853
return (INTL_data(pText) || (pText->dsc_ttype() == ttype_binary));
857
bool INTL_defined_type(thread_db* tdbb, USHORT t_type)
859
/**************************************
861
* I N T L _ d e f i n e d _ t y p e
863
**************************************
865
* Functional description
866
* Is (t_type) a known text type?
868
* false type is not defined.
869
* true type is defined
872
* Due to cleanup that must happen in DFW, this routine
873
* must return, and not call ERR directly.
875
**************************************/
878
ISC_STATUS* const original_status = tdbb->tdbb_status_vector;
883
ISC_STATUS_ARRAY local_status;
884
tdbb->tdbb_status_vector = local_status;
886
INTL_texttype_lookup(tdbb, t_type);
893
tdbb->tdbb_status_vector = original_status;
899
void INTL_init(thread_db* tdbb)
901
/**************************************
905
**************************************
907
* Functional description
909
**************************************/
913
USHORT INTL_key_length(thread_db* tdbb, USHORT idxType, USHORT iLength)
915
/**************************************
917
* I N T L _ k e y _ l e n g t h
919
**************************************
921
* Functional description
922
* Given an index type, and a maximum length (iLength)
923
* return the length of the byte string key descriptor to
924
* use when collating text of this type.
926
**************************************/
929
fb_assert(idxType >= idx_first_intl_string);
931
const USHORT ttype = INTL_INDEX_TO_TEXT(idxType);
934
if (ttype <= ttype_last_internal)
935
key_length = iLength;
937
TextType* obj = INTL_texttype_lookup(tdbb, ttype);
938
key_length = obj->key_length(iLength);
941
/* Validity checks on the computed key_length */
943
if (key_length > MAX_KEY)
944
key_length = MAX_KEY;
946
if (key_length < iLength)
947
key_length = iLength;
953
CharSet* INTL_charset_lookup(thread_db* tdbb, USHORT parm1)
955
/**************************************
957
* I N T L _ c h a r s e t _ l o o k u p
959
**************************************
961
* Functional description
963
* Lookup a character set descriptor.
965
* First, search the appropriate vector that hangs
966
* off the dbb. If not found, then call the lower
967
* level lookup routine to allocate it, or punt
968
* if we don't know about the charset.
971
* *charset - if no errors;
974
**************************************/
975
CharSetContainer *cs = CharSetContainer::lookupCharset(tdbb, parm1);
976
return cs->getCharSet();
980
Collation* INTL_texttype_lookup(thread_db* tdbb,
983
/**************************************
985
* I N T L _ t e x t t y p e _ l o o k u p
987
**************************************
989
* Functional description
991
* Lookup either a character set descriptor or
992
* texttype descriptor object.
994
* First, search the appropriate vector that hangs
995
* off the dbb. If not found, then call the lower
996
* level lookup routine to find it in the libraries.
999
* *object - if no errors;
1000
* <never> - if error
1002
**************************************/
1004
Database* dbb = tdbb->getDatabase();
1006
if (parm1 == ttype_dynamic)
1007
parm1 = MAP_CHARSET_TO_TTYPE(tdbb->getAttachment()->att_charset);
1009
CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, parm1);
1011
return csc->lookupCollation(tdbb, parm1);
1015
void INTL_texttype_unload(thread_db* tdbb,
1018
/**************************************
1020
* I N T L _ t e x t t y p e _ u n l o a d
1022
**************************************
1024
* Functional description
1025
* Unload a collation from memory.
1027
**************************************/
1030
CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, ttype);
1032
csc->unloadCollation(tdbb, ttype);
1036
bool INTL_texttype_validate(Jrd::thread_db* tdbb, const SubtypeInfo* info)
1038
/**************************************
1040
* I N T L _ t e x t t y p e _ v a l i d a t e
1042
**************************************
1044
* Functional description
1045
* Check if collation attributes are valid.
1047
**************************************/
1051
memset(&tt, 0, sizeof(tt));
1053
bool ret = lookup_texttype(&tt, info);
1055
if (ret && tt.texttype_fn_destroy)
1056
tt.texttype_fn_destroy(&tt);
1062
void INTL_pad_spaces(thread_db* tdbb, DSC * type, UCHAR * string, ULONG length)
1064
/**************************************
1066
* I N T L _ p a d _ s p a c e s
1068
**************************************
1070
* Functional description
1071
* Pad a buffer with spaces, using the character
1072
* set's defined space character.
1074
**************************************/
1077
fb_assert(type != NULL);
1078
fb_assert(IS_TEXT(type));
1079
fb_assert(string != NULL);
1081
const USHORT charset = INTL_charset(tdbb, type->dsc_ttype());
1082
pad_spaces(tdbb, charset, string, length);
1086
USHORT INTL_string_to_key(thread_db* tdbb,
1092
/**************************************
1094
* I N T L _ s t r i n g _ t o _ k e y
1096
**************************************
1098
* Functional description
1099
* Given an input string, convert it to a byte string
1100
* that will collate naturally (byte order).
1102
* Return the length of the resulting byte string.
1104
**************************************/
1110
fb_assert(idxType >= idx_first_intl_string || idxType == idx_string
1111
|| idxType == idx_byte_array || idxType == idx_metadata);
1112
fb_assert(pString != NULL);
1113
fb_assert(pByte != NULL);
1114
fb_assert(pString->dsc_address != NULL);
1115
fb_assert(pByte->dsc_address != NULL);
1116
fb_assert(pByte->dsc_dtype == dtype_text);
1123
case idx_byte_array:
1125
ttype = ttype_binary;
1129
ttype = ttype_metadata;
1133
ttype = INTL_INDEX_TO_TEXT(idxType);
1137
/* Make a string into the proper type of text */
1141
USHORT len = MOV_make_string2(tdbb, pString, ttype, &src, temp);
1144
char* dest = reinterpret_cast<char*>(pByte->dsc_address);
1145
USHORT destLen = pByte->dsc_length;
1148
case ttype_metadata:
1152
while (len-- && destLen-- > 0)
1154
/* strip off ending pad characters */
1155
while (dest > (const char*)pByte->dsc_address) {
1156
if (*(dest - 1) == pad_char)
1161
outlen = (dest - (const char*)pByte->dsc_address);
1164
TextType* obj = INTL_texttype_lookup(tdbb, ttype);
1165
outlen = obj->string_to_key(len,
1166
reinterpret_cast<const unsigned char*>(src),
1168
reinterpret_cast<unsigned char*>(dest),
1177
int INTL_str_to_upper(thread_db* tdbb, DSC * pString)
1179
/**************************************
1181
* I N T L _ s t r _ t o _ u p p e r
1183
**************************************
1185
* Functional description
1186
* Given an input string, convert it to uppercase
1188
**************************************/
1191
fb_assert(pString != NULL);
1192
fb_assert(pString->dsc_address != NULL);
1195
UCHAR buffer[MAX_KEY];
1198
CVT_get_string_ptr(pString, &ttype, &src,
1199
reinterpret_cast<vary*>(buffer),
1200
sizeof(buffer), ERR_post);
1205
/* cannot uppercase binary strings */
1212
*dest++ = UPPER7(*src);
1218
TextType* obj = INTL_texttype_lookup(tdbb, ttype);
1219
obj->str_to_upper(len, src, len, src); // ASF: this works for all cases? (src and dst buffers are the same)
1223
* Added to remove compiler errors. Callers are not checking
1224
* the return code from this function 4/5/95.
1230
int INTL_str_to_lower(thread_db* tdbb, DSC * pString)
1232
/**************************************
1234
* I N T L _ s t r _ t o _ l o w e r
1236
**************************************
1238
* Functional description
1239
* Given an input string, convert it to lowercase
1241
**************************************/
1244
fb_assert(pString != NULL);
1245
fb_assert(pString->dsc_address != NULL);
1248
UCHAR buffer[MAX_KEY];
1251
CVT_get_string_ptr(pString, &ttype, &src,
1252
reinterpret_cast<vary*>(buffer),
1253
sizeof(buffer), ERR_post);
1258
/* cannot lowercase binary strings */
1265
*dest++ = LOWWER7(*src);
1271
TextType* obj = INTL_texttype_lookup(tdbb, ttype);
1272
obj->str_to_lower(len, src, len, src); // ASF: this works for all cases? (src and dst buffers are the same)
1276
* Added to remove compiler errors. Callers are not checking
1277
* the return code from this function 4/5/95.
1283
static bool all_spaces(
1286
const BYTE* ptr, ULONG len, ULONG offset)
1288
/**************************************
1290
* a l l _ s p a c e s
1292
**************************************
1294
* Functional description
1295
* determine if the string at ptr[offset] ... ptr[len] is entirely
1296
* spaces, as per the space definition of (charset).
1297
* The binary representation of a Space is character-set dependent.
1298
* (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for
1299
* 0x??20, which is NOT a space.
1300
**************************************/
1303
fb_assert(ptr != NULL);
1305
CharSet* obj = INTL_charset_lookup(tdbb, charset);
1308
* We are assuming offset points to the first byte which was not
1309
* consumed in a conversion. And that offset is pointing
1310
* to a character boundary
1313
// Single-octet character sets are optimized here
1315
if (obj->getSpaceLength() == 1) {
1316
const BYTE* p = &ptr[offset];
1317
const BYTE* const end = &ptr[len];
1319
if (*p++ != *obj->getSpace())
1325
const BYTE* p = &ptr[offset];
1326
const BYTE* const end = &ptr[len];
1327
const unsigned char* space = obj->getSpace();
1328
const unsigned char* const end_space = &space[obj->getSpaceLength()];
1330
space = obj->getSpace();
1331
while (p < end && space < end_space) {
1332
if (*p++ != *space++)
1341
static int blocking_ast_collation(void* ast_object)
1343
/**************************************
1345
* b l o c k i n g _ a s t _ c o l l a t i o n
1347
**************************************
1349
* Functional description
1350
* Someone is trying to drop a collation. If there
1351
* are outstanding interests in the existence of
1352
* the collation then just mark as blocking and return.
1353
* Otherwise, mark the collation as obsolete
1354
* and release the collation existence lock.
1356
**************************************/
1357
Collation* tt = static_cast<Collation*>(ast_object);
1359
if (tt && tt->useCount == 0)
1361
tt->obsolete = true;
1363
if (tt->existenceLock)
1365
thread_db thd_context, *tdbb;
1367
// Since this routine will be called asynchronously, we must establish
1368
// a thread context.
1369
JRD_set_thread_data(tdbb, thd_context);
1371
tdbb->setDatabase(tt->existenceLock->lck_dbb);
1372
tdbb->setAttachment(tt->existenceLock->lck_attachment);
1373
tdbb->tdbb_quantum = QUANTUM;
1374
tdbb->setRequest(NULL);
1375
tdbb->setTransaction(NULL);
1376
Jrd::ContextPoolHolder context(tdbb, 0);
1378
LCK_release(tdbb, tt->existenceLock);
1379
tt->existenceLock = NULL;
1381
// Restore the prior thread context
1382
JRD_restore_thread_data();
1390
static void pad_spaces(thread_db* tdbb, CHARSET_ID charset, BYTE* ptr, ULONG len)
1392
/**************************************
1394
* p a d _ s p a c e s
1396
**************************************
1398
* Functional description
1399
* Pad a buffer with the character set defined space character.
1401
**************************************/
1404
fb_assert(ptr != NULL);
1406
CharSet* obj = INTL_charset_lookup(tdbb, charset);
1408
/* Single-octet character sets are optimized here */
1409
if (obj->getSpaceLength() == 1) {
1410
const BYTE* const end = &ptr[len];
1412
*ptr++ = *obj->getSpace();
1415
const BYTE* const end = &ptr[len];
1416
const UCHAR* space = obj->getSpace();
1417
const UCHAR* const end_space = &space[obj->getSpaceLength()];
1419
space = obj->getSpace();
1420
while (ptr < end && space < end_space) {
1423
/* This fb_assert is checking that we didn't have a buffer-end
1424
* in the middle of a space character
1426
fb_assert(!(ptr == end) || (space == end_space));