2
File: UnicodeUtilities.p
4
Contains: Types, constants, prototypes for Unicode Utilities (Unicode input and text utils)
6
Version: Technology: Mac OS 9.0
7
Release: Universal Interfaces 3.4.2
9
Copyright: � 1997-2002 by Apple Computer, Inc., all rights reserved.
11
Bugs?: For bug reports, consult the following page on
14
http://www.freepascal.org/bugs.html
20
Modified for use with Free Pascal
22
Please report any bugs to <gpc@microbizz.nl>
31
unit UnicodeUtilities;
33
{$setc UNIVERSAL_INTERFACES_VERSION := $0342}
34
{$setc GAP_INTERFACES_VERSION := $0210}
36
{$ifc not defined USE_CFSTR_CONSTANT_MACROS}
37
{$setc USE_CFSTR_CONSTANT_MACROS := TRUE}
40
{$ifc defined CPUPOWERPC and defined CPUI386}
41
{$error Conflicting initial definitions for CPUPOWERPC and CPUI386}
43
{$ifc defined FPC_BIG_ENDIAN and defined FPC_LITTLE_ENDIAN}
44
{$error Conflicting initial definitions for FPC_BIG_ENDIAN and FPC_LITTLE_ENDIAN}
47
{$ifc not defined __ppc__ and defined CPUPOWERPC}
52
{$ifc not defined __i386__ and defined CPUI386}
58
{$ifc defined __ppc__ and __ppc__ and defined __i386__ and __i386__}
59
{$error Conflicting definitions for __ppc__ and __i386__}
62
{$ifc defined __ppc__ and __ppc__}
63
{$setc TARGET_CPU_PPC := TRUE}
64
{$setc TARGET_CPU_X86 := FALSE}
65
{$elifc defined __i386__ and __i386__}
66
{$setc TARGET_CPU_PPC := FALSE}
67
{$setc TARGET_CPU_X86 := TRUE}
69
{$error Neither __ppc__ nor __i386__ is defined.}
71
{$setc TARGET_CPU_PPC_64 := FALSE}
73
{$ifc defined FPC_BIG_ENDIAN}
74
{$setc TARGET_RT_BIG_ENDIAN := TRUE}
75
{$setc TARGET_RT_LITTLE_ENDIAN := FALSE}
76
{$elifc defined FPC_LITTLE_ENDIAN}
77
{$setc TARGET_RT_BIG_ENDIAN := FALSE}
78
{$setc TARGET_RT_LITTLE_ENDIAN := TRUE}
80
{$error Neither FPC_BIG_ENDIAN nor FPC_LITTLE_ENDIAN are defined.}
82
{$setc ACCESSOR_CALLS_ARE_FUNCTIONS := TRUE}
83
{$setc CALL_NOT_IN_CARBON := FALSE}
84
{$setc OLDROUTINENAMES := FALSE}
85
{$setc OPAQUE_TOOLBOX_STRUCTS := TRUE}
86
{$setc OPAQUE_UPP_TYPES := TRUE}
87
{$setc OTCARBONAPPLICATION := TRUE}
88
{$setc OTKERNEL := FALSE}
89
{$setc PM_USE_SESSION_APIS := TRUE}
90
{$setc TARGET_API_MAC_CARBON := TRUE}
91
{$setc TARGET_API_MAC_OS8 := FALSE}
92
{$setc TARGET_API_MAC_OSX := TRUE}
93
{$setc TARGET_CARBON := TRUE}
94
{$setc TARGET_CPU_68K := FALSE}
95
{$setc TARGET_CPU_MIPS := FALSE}
96
{$setc TARGET_CPU_SPARC := FALSE}
97
{$setc TARGET_OS_MAC := TRUE}
98
{$setc TARGET_OS_UNIX := FALSE}
99
{$setc TARGET_OS_WIN32 := FALSE}
100
{$setc TARGET_RT_MAC_68881 := FALSE}
101
{$setc TARGET_RT_MAC_CFM := FALSE}
102
{$setc TARGET_RT_MAC_MACHO := TRUE}
103
{$setc TYPED_FUNCTION_POINTERS := TRUE}
104
{$setc TYPE_BOOL := FALSE}
105
{$setc TYPE_EXTENDED := FALSE}
106
{$setc TYPE_LONGLONG := TRUE}
107
uses MacTypes,MacLocales,TextCommon;
113
-------------------------------------------------------------------------------------------------
114
CONSTANTS & DATA STRUCTURES for UCKeyTranslate & UCKeyboardLayout ('uchr' resource)
115
-------------------------------------------------------------------------------------------------
119
-------------------------------------------------------------------------------------------------
120
UCKeyOutput & related stuff
121
The interpretation of UCKeyOutput depends on bits 15-14.
122
If they are 01, then bits 0-13 are an index in UCKeyStateRecordsIndex (resource-wide list).
123
If they are 10, then bits 0-13 are an index in UCKeySequenceDataIndex (resource-wide list),
124
or if UCKeySequenceDataIndex is not present or the index is beyond the end of the list,
125
then bits 0-15 are a single Unicode character.
126
Otherwise, bits 0-15 are a single Unicode character; a value of 0xFFFE-0xFFFF means no character
128
UCKeyCharSeq is similar, but does not support indices in UCKeyStateRecordsIndex. For bits 15-14:
129
If they are 10, then bits 0-13 are an index in UCKeySequenceDataIndex (resource-wide list),
130
or if UCKeySequenceDataIndex is not present or the index is beyond the end of the list,
131
then bits 0-15 are a single Unicode character.
132
Otherwise, bits 0-15 are a single Unicode character; a value of 0xFFFE-0xFFFF means no character
134
-------------------------------------------------------------------------------------------------
139
UCKeyOutput = UInt16;
140
UCKeyCharSeq = UInt16;
143
kUCKeyOutputStateIndexMask = $4000;
144
kUCKeyOutputSequenceIndexMask = $8000;
145
kUCKeyOutputTestForIndexMask = $C000; { test bits 14-15 }
146
kUCKeyOutputGetIndexMask = $3FFF; { get bits 0-13 }
149
-------------------------------------------------------------------------------------------------
150
UCKeyStateRecord & related stuff
151
The UCKeyStateRecord information is used as follows. If the current state is zero,
152
output stateZeroCharData and set the state to stateZeroNextState. If the current state
153
is non-zero and there is an entry for it in stateEntryData, then output the corresponding
154
charData and set the state to nextState. Otherwise, output the state terminator from
155
UCKeyStateTerminators for the current state (or nothing if there is no UCKeyStateTerminators
156
table or it has no entry for the current state), then output stateZeroCharData and set the
157
state to stateZeroNextState.
158
-------------------------------------------------------------------------------------------------
163
UCKeyStateRecordPtr = ^UCKeyStateRecord;
164
UCKeyStateRecord = record
165
stateZeroCharData: UCKeyCharSeq;
166
stateZeroNextState: UInt16;
167
stateEntryCount: UInt16;
168
stateEntryFormat: UInt16;
169
{ This is followed by an array of stateEntryCount elements }
170
{ in the specified format. Here we just show a dummy array. }
171
stateEntryData: array [0..0] of UInt32;
175
Here are the codes for entry formats currently defined.
176
Each entry maps from curState to charData and nextState.
180
kUCKeyStateEntryTerminalFormat = $0001;
181
kUCKeyStateEntryRangeFormat = $0002;
184
For UCKeyStateEntryTerminal -
185
nextState is always 0, so we don't have a field for it
190
UCKeyStateEntryTerminalPtr = ^UCKeyStateEntryTerminal;
191
UCKeyStateEntryTerminal = record
193
charData: UCKeyCharSeq;
197
For UCKeyStateEntryRange -
198
If curState >= curStateStart and curState <= curStateStart+curStateRange,
199
then it matches the entry, and we transform charData and nextState as follows:
200
If charData < 0xFFFE, then charData += (curState-curStateStart)*deltaMultiplier
201
If nextState != 0, then nextState += (curState-curStateStart)*deltaMultiplier
203
UCKeyStateEntryRangePtr = ^UCKeyStateEntryRange;
204
UCKeyStateEntryRange = record
205
curStateStart: UInt16;
206
curStateRange: SInt8;
207
deltaMultiplier: SInt8;
208
charData: UCKeyCharSeq;
213
-------------------------------------------------------------------------------------------------
214
UCKeyboardLayout & related stuff
215
The UCKeyboardLayout struct given here is only for the resource header. It specifies
216
offsets to the various subtables which each have their own structs, given below.
217
The keyboardTypeHeadList array selects table offsets that depend on keyboardType. The
218
first entry in keyboardTypeHeadList is the default entry, which will be used if the
219
keyboardType passed to UCKeyTranslate does not match any other entry - i.e. does not fall
220
within the range keyboardTypeFirst..keyboardTypeLast for some entry. The first entry
221
should have keyboardTypeFirst = keyboardTypeLast = 0.
222
-------------------------------------------------------------------------------------------------
224
UCKeyboardTypeHeaderPtr = ^UCKeyboardTypeHeader;
225
UCKeyboardTypeHeader = record
226
keyboardTypeFirst: UInt32; { first keyboardType in this entry }
227
keyboardTypeLast: UInt32; { last keyboardType in this entry }
228
keyModifiersToTableNumOffset: ByteOffset; { required }
229
keyToCharTableIndexOffset: ByteOffset; { required }
230
keyStateRecordsIndexOffset: ByteOffset; { 0 => no table }
231
keyStateTerminatorsOffset: ByteOffset; { 0 => no table }
232
keySequenceDataIndexOffset: ByteOffset; { 0 => no table }
235
UCKeyboardLayoutPtr = ^UCKeyboardLayout;
236
UCKeyboardLayout = record
237
{ header only; other tables accessed via offsets }
238
keyLayoutHeaderFormat: UInt16; { =kUCKeyLayoutHeaderFormat }
239
keyLayoutDataVersion: UInt16; { 0x0100 = 1.0, 0x0110 = 1.1, etc. }
240
keyLayoutFeatureInfoOffset: ByteOffset; { may be 0 }
241
keyboardTypeCount: ItemCount; { Dimension for keyboardTypeHeadList[] }
242
keyboardTypeList: array [0..0] of UCKeyboardTypeHeader;
245
{ ------------------------------------------------------------------------------------------------- }
246
UCKeyLayoutFeatureInfoPtr = ^UCKeyLayoutFeatureInfo;
247
UCKeyLayoutFeatureInfo = record
248
keyLayoutFeatureInfoFormat: UInt16; { =kUCKeyLayoutFeatureInfoFormat }
250
maxOutputStringLength: UniCharCount; { longest possible output string }
253
{ ------------------------------------------------------------------------------------------------- }
254
UCKeyModifiersToTableNumPtr = ^UCKeyModifiersToTableNum;
255
UCKeyModifiersToTableNum = record
256
keyModifiersToTableNumFormat: UInt16; { =kUCKeyModifiersToTableNumFormat }
257
defaultTableNum: UInt16; { For modifier combos not in tableNum[] }
258
modifiersCount: ItemCount; { Dimension for tableNum[] }
260
{ Then there is padding to a 4-byte boundary with bytes containing 0, if necessary. }
263
{ ------------------------------------------------------------------------------------------------- }
264
UCKeyToCharTableIndexPtr = ^UCKeyToCharTableIndex;
265
UCKeyToCharTableIndex = record
266
keyToCharTableIndexFormat: UInt16; { =kUCKeyToCharTableIndexFormat }
267
keyToCharTableSize: UInt16; { Max keyCode (128 for ADB keyboards) }
268
keyToCharTableCount: ItemCount; { Dimension for keyToCharTableOffsets[] (usually 6 to 12 tables) }
269
keyToCharTableOffsets: array [0..0] of ByteOffset;
270
{ Each offset in keyToCharTableOffsets is from the beginning of the resource to a }
271
{ table as follows: }
272
{ UCKeyOutput keyToCharData[keyToCharTableSize]; }
273
{ These tables follow the UCKeyToCharTableIndex. }
274
{ Then there is padding to a 4-byte boundary with bytes containing 0, if necessary. }
277
{ ------------------------------------------------------------------------------------------------- }
278
UCKeyStateRecordsIndexPtr = ^UCKeyStateRecordsIndex;
279
UCKeyStateRecordsIndex = record
280
keyStateRecordsIndexFormat: UInt16; { =kUCKeyStateRecordsIndexFormat }
281
keyStateRecordCount: UInt16; { Dimension for keyStateRecordOffsets[] }
282
keyStateRecordOffsets: array [0..0] of ByteOffset;
283
{ Each offset in keyStateRecordOffsets is from the beginning of the resource to a }
284
{ UCKeyStateRecord. These UCKeyStateRecords follow the keyStateRecordOffsets[] array. }
285
{ Then there is padding to a 4-byte boundary with bytes containing 0, if necessary. }
288
{ ------------------------------------------------------------------------------------------------- }
289
UCKeyStateTerminatorsPtr = ^UCKeyStateTerminators;
290
UCKeyStateTerminators = record
291
keyStateTerminatorsFormat: UInt16; { =kUCKeyStateTerminatorsFormat }
292
keyStateTerminatorCount: UInt16; { Dimension for keyStateTerminators[] (# of nonzero states) }
293
keyStateTerminators: array [0..0] of UCKeyCharSeq;
294
{ Note: keyStateTerminators[0] is terminator for state 1, etc. }
295
{ Then there is padding to a 4-byte boundary with bytes containing 0, if necessary. }
298
{ ------------------------------------------------------------------------------------------------- }
299
UCKeySequenceDataIndexPtr = ^UCKeySequenceDataIndex;
300
UCKeySequenceDataIndex = record
301
keySequenceDataIndexFormat: UInt16; { =kUCKeySequenceDataIndexFormat }
302
charSequenceCount: UInt16; { Dimension of charSequenceOffsets[] is charSequenceCount+1 }
303
charSequenceOffsets: array [0..0] of UInt16;
304
{ Each offset in charSequenceOffsets is in bytes, from the beginning of }
305
{ UCKeySequenceDataIndex to a sequence of UniChars; the next offset indicates the }
306
{ end of the sequence. The UniChar sequences follow the UCKeySequenceDataIndex. }
307
{ Then there is padding to a 4-byte boundary with bytes containing 0, if necessary. }
310
{ ------------------------------------------------------------------------------------------------- }
311
{ Current format codes for the various tables (bits 12-15 indicate which table) }
315
kUCKeyLayoutHeaderFormat = $1002;
316
kUCKeyLayoutFeatureInfoFormat = $2001;
317
kUCKeyModifiersToTableNumFormat = $3001;
318
kUCKeyToCharTableIndexFormat = $4001;
319
kUCKeyStateRecordsIndexFormat = $5001;
320
kUCKeyStateTerminatorsFormat = $6001;
321
kUCKeySequenceDataIndexFormat = $7001;
325
-------------------------------------------------------------------------------------------------
326
Constants for keyAction parameter in UCKeyTranslate()
327
-------------------------------------------------------------------------------------------------
330
kUCKeyActionDown = 0; { key is going down }
331
kUCKeyActionUp = 1; { key is going up }
332
kUCKeyActionAutoKey = 2; { auto-key down }
333
kUCKeyActionDisplay = 3; { get information for key display (as in Key Caps) }
336
-------------------------------------------------------------------------------------------------
337
Bit assignments & masks for keyTranslateOptions parameter in UCKeyTranslate()
338
-------------------------------------------------------------------------------------------------
341
kUCKeyTranslateNoDeadKeysBit = 0; { Prevents setting any new dead-key states }
343
kUCKeyTranslateNoDeadKeysMask = $00000001;
346
-------------------------------------------------------------------------------------------------
347
CONSTANTS & DATA STRUCTURES for Unicode Collation
348
-------------------------------------------------------------------------------------------------
350
{ constant for LocaleOperationClass }
351
kUnicodeCollationClass = FourCharCode('ucol');
355
CollatorRef = ^SInt32; { an opaque 32-bit type }
356
CollatorRefPtr = ^CollatorRef; { when a var xx:CollatorRef parameter can be nil, it is changed to xx: CollatorRefPtr }
357
UCCollateOptions = UInt32;
359
{ Sensitivity options }
360
kUCCollateComposeInsensitiveMask = $00000002;
361
kUCCollateWidthInsensitiveMask = $00000004;
362
kUCCollateCaseInsensitiveMask = $00000008;
363
kUCCollateDiacritInsensitiveMask = $00000010; { Other general options }
364
kUCCollatePunctuationSignificantMask = $00008000; { Number-handling options }
365
kUCCollateDigitsOverrideMask = $00010000;
366
kUCCollateDigitsAsNumberMask = $00020000;
368
kUCCollateStandardOptions = $00000006;
371
Special values to specify various invariant orders for UCCompareTextNoLocale.
372
These values use the high 8 bits of UCCollateOptions.
374
kUCCollateTypeHFSExtended = 1;
376
{ These constants are used for masking and shifting the invariant order type. }
377
kUCCollateTypeSourceMask = $000000FF;
378
kUCCollateTypeShiftBits = 24;
380
kUCCollateTypeMask = $FF000000;
384
UCCollationValue = UInt32;
385
UCCollationValuePtr = ^UCCollationValue;
387
-------------------------------------------------------------------------------------------------
388
CONSTANTS & DATA STRUCTURES for Unicode TextBreak
389
-------------------------------------------------------------------------------------------------
391
{ constant for LocaleOperationClass }
394
kUnicodeTextBreakClass = FourCharCode('ubrk');
398
TextBreakLocatorRef = ^SInt32; { an opaque 32-bit type }
399
TextBreakLocatorRefPtr = ^TextBreakLocatorRef; { when a var xx:TextBreakLocatorRef parameter can be nil, it is changed to xx: TextBreakLocatorRefPtr }
400
UCTextBreakType = UInt32;
402
kUCTextBreakCharMask = $00000001;
403
kUCTextBreakClusterMask = $00000004;
404
kUCTextBreakWordMask = $00000010;
405
kUCTextBreakLineMask = $00000040;
409
UCTextBreakOptions = UInt32;
411
kUCTextBreakLeadingEdgeMask = $00000001;
412
kUCTextBreakGoBackwardsMask = $00000002;
413
kUCTextBreakIterateMask = $00000004;
416
-------------------------------------------------------------------------------------------------
418
-------------------------------------------------------------------------------------------------
425
* Non-Carbon CFM: in UnicodeUtilitiesCoreLib 8.5 and later
426
* CarbonLib: in CarbonLib 1.0 and later
427
* Mac OS X: in version 10.0 and later
429
function UCKeyTranslate(const (*var*) keyLayoutPtr: UCKeyboardLayout; virtualKeyCode: UInt16; keyAction: UInt16; modifierKeyState: UInt32; keyboardType: UInt32; keyTranslateOptions: OptionBits; var deadKeyState: UInt32; maxStringLength: UniCharCount; var actualStringLength: UniCharCount; unicodeString: UniCharPtr): OSStatus; external name '_UCKeyTranslate';
431
{ Standard collation functions }
436
* Non-Carbon CFM: in UnicodeUtilitiesLib 8.6 and later
437
* CarbonLib: in CarbonLib 1.0 and later
438
* Mac OS X: in version 10.0 and later
440
function UCCreateCollator(locale: LocaleRef; opVariant: LocaleOperationVariant; options: UCCollateOptions; var collatorRef_: CollatorRef): OSStatus; external name '_UCCreateCollator';
443
* UCGetCollationKey()
446
* Non-Carbon CFM: in UnicodeUtilitiesLib 8.6 and later
447
* CarbonLib: in CarbonLib 1.0 and later
448
* Mac OS X: in version 10.0 and later
450
function UCGetCollationKey(collatorRef_: CollatorRef; textPtr: ConstUniCharPtr; textLength: UniCharCount; maxKeySize: ItemCount; var actualKeySize: ItemCount; collationKey: UCCollationValuePtr): OSStatus; external name '_UCGetCollationKey';
453
* UCCompareCollationKeys()
456
* Non-Carbon CFM: in UnicodeUtilitiesCoreLib 8.6 and later
457
* CarbonLib: in CarbonLib 1.0 and later
458
* Mac OS X: in version 10.0 and later
460
function UCCompareCollationKeys(key1Ptr: UCCollationValuePtr; key1Length: ItemCount; key2Ptr: UCCollationValuePtr; key2Length: ItemCount; var equivalent: boolean; var order: SInt32): OSStatus; external name '_UCCompareCollationKeys';
466
* Non-Carbon CFM: in UnicodeUtilitiesLib 8.6 and later
467
* CarbonLib: in CarbonLib 1.0 and later
468
* Mac OS X: in version 10.0 and later
470
function UCCompareText(collatorRef_: CollatorRef; text1Ptr: ConstUniCharPtr; text1Length: UniCharCount; text2Ptr: ConstUniCharPtr; text2Length: UniCharCount; var equivalent: boolean; var order: SInt32): OSStatus; external name '_UCCompareText';
473
* UCDisposeCollator()
476
* Non-Carbon CFM: in UnicodeUtilitiesLib 8.6 and later
477
* CarbonLib: in CarbonLib 1.0 and later
478
* Mac OS X: in version 10.0 and later
480
function UCDisposeCollator(var collatorRef_: CollatorRef): OSStatus; external name '_UCDisposeCollator';
482
{ Simple collation using default locale }
485
* UCCompareTextDefault()
488
* Non-Carbon CFM: in UnicodeUtilitiesLib 8.6 and later
489
* CarbonLib: in CarbonLib 1.0 and later
490
* Mac OS X: in version 10.0 and later
492
function UCCompareTextDefault(options: UCCollateOptions; text1Ptr: ConstUniCharPtr; text1Length: UniCharCount; text2Ptr: ConstUniCharPtr; text2Length: UniCharCount; var equivalent: boolean; var order: SInt32): OSStatus; external name '_UCCompareTextDefault';
495
{ Simple locale-independent collation }
498
* UCCompareTextNoLocale()
501
* Non-Carbon CFM: in UnicodeUtilitiesCoreLib 8.6 and later
502
* CarbonLib: in CarbonLib 1.0 and later
503
* Mac OS X: in version 10.0 and later
505
function UCCompareTextNoLocale(options: UCCollateOptions; text1Ptr: ConstUniCharPtr; text1Length: UniCharCount; text2Ptr: ConstUniCharPtr; text2Length: UniCharCount; var equivalent: boolean; var order: SInt32): OSStatus; external name '_UCCompareTextNoLocale';
507
{ Standard text break (text boundary) functions }
509
* UCCreateTextBreakLocator()
512
* Non-Carbon CFM: in UnicodeUtilitiesLib 9.0 and later
513
* CarbonLib: in CarbonLib 1.0 and later
514
* Mac OS X: in version 10.0 and later
516
function UCCreateTextBreakLocator(locale: LocaleRef; opVariant: LocaleOperationVariant; breakTypes: UCTextBreakType; var breakRef: TextBreakLocatorRef): OSStatus; external name '_UCCreateTextBreakLocator';
522
* Non-Carbon CFM: in UnicodeUtilitiesLib 9.0 and later
523
* CarbonLib: in CarbonLib 1.0 and later
524
* Mac OS X: in version 10.0 and later
526
function UCFindTextBreak(breakRef: TextBreakLocatorRef; breakType: UCTextBreakType; options: UCTextBreakOptions; textPtr: ConstUniCharPtr; textLength: UniCharCount; startOffset: UniCharArrayOffset; var breakOffset: UniCharArrayOffset): OSStatus; external name '_UCFindTextBreak';
529
* UCDisposeTextBreakLocator()
532
* Non-Carbon CFM: in UnicodeUtilitiesLib 9.0 and later
533
* CarbonLib: in CarbonLib 1.0 and later
534
* Mac OS X: in version 10.0 and later
536
function UCDisposeTextBreakLocator(var breakRef: TextBreakLocatorRef): OSStatus; external name '_UCDisposeTextBreakLocator';