2
*******************************************************************************
4
* Copyright (C) 2004-2010, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
10
* tab size: 8 (not used)
13
* created on: 2004oct06
14
* created by: Markus W. Scherer
22
* \brief C API: Abstract Unicode Text API
24
* The Text Access API provides a means to allow text that is stored in alternative
25
* formats to work with ICU services. ICU normally operates on text that is
26
* stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
27
* UnicodeString for C++ APIs.
29
* ICU Text Access allows other formats, such as UTF-8 or non-contiguous
30
* UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
32
* There are three general classes of usage for UText:
34
* Application Level Use. This is the simplest usage - applications would
35
* use one of the utext_open() functions on their input text, and pass
36
* the resulting UText to the desired ICU service.
38
* Second is usage in ICU Services, such as break iteration, that will need to
39
* operate on input presented to them as a UText. These implementations
40
* will need to use the iteration and related UText functions to gain
41
* access to the actual text.
43
* The third class of UText users are "text providers." These are the
44
* UText implementations for the various text storage formats. An application
45
* or system with a unique text storage format can implement a set of
46
* UText provider functions for that format, which will then allow
47
* ICU services to operate on that format.
50
* <em>Iterating over text</em>
52
* Here is sample code for a forward iteration over the contents of a UText
56
* UText *ut = whatever();
58
* for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
59
* // do whatever with the codepoint c here.
63
* And here is similar code to iterate in the reverse direction, from the end
64
* of the text towards the beginning.
68
* UText *ut = whatever();
69
* int textLength = utext_nativeLength(ut);
70
* for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
71
* // do whatever with the codepoint c here.
75
* <em>Characters and Indexing</em>
77
* Indexing into text by UText functions is nearly always in terms of the native
78
* indexing of the underlying text storage. The storage format could be UTF-8
79
* or UTF-32, for example. When coding to the UText access API, no assumptions
80
* can be made regarding the size of characters, or how far an index
81
* may move when iterating between characters.
83
* All indices supplied to UText functions are pinned to the length of the
84
* text. An out-of-bounds index is not considered to be an error, but is
85
* adjusted to be in the range 0 <= index <= length of input text.
88
* When an index position is returned from a UText function, it will be
89
* a native index to the underlying text. In the case of multi-unit characters,
90
* it will always refer to the first position of the character,
91
* never to the interior. This is essentially the same thing as saying that
92
* a returned index will always point to a boundary between characters.
94
* When a native index is supplied to a UText function, all indices that
95
* refer to any part of a multi-unit character representation are considered
96
* to be equivalent. In the case of multi-unit characters, an incoming index
97
* will be logically normalized to refer to the start of the character.
99
* It is possible to test whether a native index is on a code point boundary
100
* by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
101
* If the index is returned unchanged, it was on a code point boundary. If
102
* an adjusted index is returned, the original index referred to the
103
* interior of a character.
105
* <em>Conventions for calling UText functions</em>
107
* Most UText access functions have as their first parameter a (UText *) pointer,
108
* which specifies the UText to be used. Unless otherwise noted, the
109
* pointer must refer to a valid, open UText. Attempting to
110
* use a closed UText or passing a NULL pointer is a programming error and
111
* will produce undefined results or NULL pointer exceptions.
113
* The UText_Open family of functions can either open an existing (closed)
114
* UText, or heap allocate a new UText. Here is sample code for creating
115
* a stack-allocated UText.
118
* char *s = whatever(); // A utf-8 string
119
* U_ErrorCode status = U_ZERO_ERROR;
120
* UText ut = UTEXT_INITIALIZER;
121
* utext_openUTF8(ut, s, -1, &status);
122
* if (U_FAILURE(status)) {
125
* // work with the UText
129
* Any existing UText passed to an open function _must_ have been initialized,
130
* either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
131
* by an open function. Passing NULL will cause the open function to
132
* heap-allocate and fully initialize a new UText.
138
#include "unicode/utypes.h"
139
#include "unicode/uchar.h"
140
#if U_SHOW_CPLUSPLUS_API
141
#include "unicode/localpointer.h"
142
#include "unicode/rep.h"
143
#include "unicode/unistr.h"
144
#include "unicode/chariter.h"
151
typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
154
/***************************************************************************************
156
* C Functions for creating UText wrappers around various kinds of text strings.
158
****************************************************************************************/
162
* Close function for UText instances.
163
* Cleans up, releases any resources being held by an open UText.
165
* If the UText was originally allocated by one of the utext_open functions,
166
* the storage associated with the utext will also be freed.
167
* If the UText storage originated with the application, as it would with
168
* a local or static instance, the storage will not be deleted.
170
* An open UText can be reset to refer to new string by using one of the utext_open()
171
* functions without first closing the UText.
173
* @param ut The UText to be closed.
174
* @return NULL if the UText struct was deleted by the close. If the UText struct
175
* was originally provided by the caller to the open function, it is
176
* returned by this function, and may be safely used again in
177
* a subsequent utext_open.
181
U_STABLE UText * U_EXPORT2
182
utext_close(UText *ut);
184
#if U_SHOW_CPLUSPLUS_API
189
* \class LocalUTextPointer
190
* "Smart pointer" class, closes a UText via utext_close().
191
* For most methods see the LocalPointerBase base class.
193
* @see LocalPointerBase
197
U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
204
* Open a read-only UText implementation for UTF-8 strings.
207
* Any invalid UTF-8 in the input will be handled in this way:
208
* a sequence of bytes that has the form of a truncated, but otherwise valid,
209
* UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
210
* Any other illegal bytes will each be replaced by a \uFFFD.
213
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
214
* If non-NULL, must refer to an initialized UText struct, which will then
215
* be reset to reference the specified UTF-8 string.
216
* @param s A UTF-8 string. Must not be NULL.
217
* @param length The length of the UTF-8 string in bytes, or -1 if the string is
219
* @param status Errors are returned here.
220
* @return A pointer to the UText. If a pre-allocated UText was provided, it
221
* will always be used and returned.
224
U_STABLE UText * U_EXPORT2
225
utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
229
* Open a read-only UText for UChar * string.
231
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
232
* If non-NULL, must refer to an initialized UText struct, which will then
233
* be reset to reference the specified UChar string.
234
* @param s A UChar (UTF-16) string
235
* @param length The number of UChars in the input string, or -1 if the string is
237
* @param status Errors are returned here.
238
* @return A pointer to the UText. If a pre-allocated UText was provided, it
239
* will always be used and returned.
242
U_STABLE UText * U_EXPORT2
243
utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
246
#if U_SHOW_CPLUSPLUS_API
248
* Open a writable UText for a non-const UnicodeString.
250
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
251
* If non-NULL, must refer to an initialized UText struct, which will then
252
* be reset to reference the specified input string.
253
* @param s A UnicodeString.
254
* @param status Errors are returned here.
255
* @return Pointer to the UText. If a UText was supplied as input, this
256
* will always be used and returned.
259
U_STABLE UText * U_EXPORT2
260
utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
264
* Open a UText for a const UnicodeString. The resulting UText will not be writable.
266
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
267
* If non-NULL, must refer to an initialized UText struct, which will then
268
* be reset to reference the specified input string.
269
* @param s A const UnicodeString to be wrapped.
270
* @param status Errors are returned here.
271
* @return Pointer to the UText. If a UText was supplied as input, this
272
* will always be used and returned.
275
U_STABLE UText * U_EXPORT2
276
utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
280
* Open a writable UText implementation for an ICU Replaceable object.
281
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
282
* If non-NULL, must refer to an already existing UText, which will then
283
* be reset to reference the specified replaceable text.
284
* @param rep A Replaceable text object.
285
* @param status Errors are returned here.
286
* @return Pointer to the UText. If a UText was supplied as input, this
287
* will always be used and returned.
291
U_STABLE UText * U_EXPORT2
292
utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
295
* Open a UText implementation over an ICU CharacterIterator.
296
* @param ut Pointer to a UText struct. If NULL, a new UText will be created.
297
* If non-NULL, must refer to an already existing UText, which will then
298
* be reset to reference the specified replaceable text.
299
* @param ci A Character Iterator.
300
* @param status Errors are returned here.
301
* @return Pointer to the UText. If a UText was supplied as input, this
302
* will always be used and returned.
306
U_STABLE UText * U_EXPORT2
307
utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
313
* Clone a UText. This is much like opening a UText where the source text is itself
316
* A deep clone will copy both the UText data structures and the underlying text.
317
* The original and cloned UText will operate completely independently; modifications
318
* made to the text in one will not affect the other. Text providers are not
319
* required to support deep clones. The user of clone() must check the status return
320
* and be prepared to handle failures.
322
* The standard UText implementations for UTF8, UChar *, UnicodeString and
323
* Replaceable all support deep cloning.
325
* The UText returned from a deep clone will be writable, assuming that the text
326
* provider is able to support writing, even if the source UText had been made
327
* non-writable by means of UText_freeze().
329
* A shallow clone replicates only the UText data structures; it does not make
330
* a copy of the underlying text. Shallow clones can be used as an efficient way to
331
* have multiple iterators active in a single text string that is not being
334
* A shallow clone operation will not fail, barring truly exceptional conditions such
335
* as memory allocation failures.
337
* Shallow UText clones should be avoided if the UText functions that modify the
338
* text are expected to be used, either on the original or the cloned UText.
339
* Any such modifications can cause unpredictable behavior. Read Only
340
* shallow clones provide some protection against errors of this type by
341
* disabling text modification via the cloned UText.
343
* A shallow clone made with the readOnly parameter == FALSE will preserve the
344
* utext_isWritable() state of the source object. Note, however, that
345
* write operations must be avoided while more than one UText exists that refer
346
* to the same underlying text.
348
* A UText and its clone may be safely concurrently accessed by separate threads.
349
* This is true for read access only with shallow clones, and for both read and
350
* write access with deep clones.
351
* It is the responsibility of the Text Provider to ensure that this thread safety
354
* @param dest A UText struct to be filled in with the result of the clone operation,
355
* or NULL if the clone function should heap-allocate a new UText struct.
356
* If non-NULL, must refer to an already existing UText, which will then
357
* be reset to become the clone.
358
* @param src The UText to be cloned.
359
* @param deep TRUE to request a deep clone, FALSE for a shallow clone.
360
* @param readOnly TRUE to request that the cloned UText have read only access to the
363
* @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
364
* will be returned if the text provider is unable to clone the
366
* @return The newly created clone, or NULL if the clone operation failed.
369
U_STABLE UText * U_EXPORT2
370
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
374
* Compare two UText objects for equality.
375
* UTexts are equal if they are iterating over the same text, and
376
* have the same iteration position within the text.
377
* If either or both of the parameters are NULL, the comparison is FALSE.
379
* @param a The first of the two UTexts to compare.
380
* @param b The other UText to be compared.
381
* @return TRUE if the two UTexts are equal.
384
U_STABLE UBool U_EXPORT2
385
utext_equals(const UText *a, const UText *b);
388
/*****************************************************************************
390
* Functions to work with the text represeted by a UText wrapper
392
*****************************************************************************/
395
* Get the length of the text. Depending on the characteristics
396
* of the underlying text representation, this may be expensive.
397
* @see utext_isLengthExpensive()
400
* @param ut the text to be accessed.
401
* @return the length of the text, expressed in native units.
405
U_STABLE int64_t U_EXPORT2
406
utext_nativeLength(UText *ut);
409
* Return TRUE if calculating the length of the text could be expensive.
410
* Finding the length of NUL terminated strings is considered to be expensive.
412
* Note that the value of this function may change
413
* as the result of other operations on a UText.
414
* Once the length of a string has been discovered, it will no longer
415
* be expensive to report it.
417
* @param ut the text to be accessed.
418
* @return TRUE if determining the length of the text could be time consuming.
421
U_STABLE UBool U_EXPORT2
422
utext_isLengthExpensive(const UText *ut);
425
* Returns the code point at the requested index,
426
* or U_SENTINEL (-1) if it is out of bounds.
428
* If the specified index points to the interior of a multi-unit
429
* character - one of the trail bytes of a UTF-8 sequence, for example -
430
* the complete code point will be returned.
432
* The iteration position will be set to the start of the returned code point.
434
* This function is roughly equivalent to the the sequence
435
* utext_setNativeIndex(index);
437
* (There is a subtle difference if the index is out of bounds by being less than zero -
438
* utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
439
* will return the char at zero. utext_char32At(negative index), on the other hand, will
440
* return the U_SENTINEL value of -1.)
442
* @param ut the text to be accessed
443
* @param nativeIndex the native index of the character to be accessed. If the index points
444
* to other than the first unit of a multi-unit character, it will be adjusted
445
* to the start of the character.
446
* @return the code point at the specified index.
449
U_STABLE UChar32 U_EXPORT2
450
utext_char32At(UText *ut, int64_t nativeIndex);
455
* Get the code point at the current iteration position,
456
* or U_SENTINEL (-1) if the iteration has reached the end of
459
* @param ut the text to be accessed.
460
* @return the Unicode code point at the current iterator position.
463
U_STABLE UChar32 U_EXPORT2
464
utext_current32(UText *ut);
468
* Get the code point at the current iteration position of the UText, and
469
* advance the position to the first index following the character.
471
* If the position is at the end of the text (the index following
472
* the last character, which is also the length of the text),
473
* return U_SENTINEL (-1) and do not advance the index.
475
* This is a post-increment operation.
477
* An inline macro version of this function, UTEXT_NEXT32(),
478
* is available for performance critical use.
480
* @param ut the text to be accessed.
481
* @return the Unicode code point at the iteration position.
485
U_STABLE UChar32 U_EXPORT2
486
utext_next32(UText *ut);
490
* Move the iterator position to the character (code point) whose
491
* index precedes the current position, and return that character.
492
* This is a pre-decrement operation.
494
* If the initial position is at the start of the text (index of 0)
495
* return U_SENTINEL (-1), and leave the position unchanged.
497
* An inline macro version of this function, UTEXT_PREVIOUS32(),
498
* is available for performance critical use.
500
* @param ut the text to be accessed.
501
* @return the previous UChar32 code point, or U_SENTINEL (-1)
502
* if the iteration has reached the start of the text.
503
* @see UTEXT_PREVIOUS32
506
U_STABLE UChar32 U_EXPORT2
507
utext_previous32(UText *ut);
511
* Set the iteration index and return the code point at that index.
512
* Leave the iteration index at the start of the following code point.
514
* This function is the most efficient and convenient way to
515
* begin a forward iteration. The results are identical to the those
522
* @param ut the text to be accessed.
523
* @param nativeIndex Iteration index, in the native units of the text provider.
524
* @return Code point which starts at or before index,
525
* or U_SENTINEL (-1) if it is out of bounds.
528
U_STABLE UChar32 U_EXPORT2
529
utext_next32From(UText *ut, int64_t nativeIndex);
534
* Set the iteration index, and return the code point preceding the
535
* one specified by the initial index. Leave the iteration position
536
* at the start of the returned code point.
538
* This function is the most efficient and convenient way to
539
* begin a backwards iteration.
541
* @param ut the text to be accessed.
542
* @param nativeIndex Iteration index in the native units of the text provider.
543
* @return Code point preceding the one at the initial index,
544
* or U_SENTINEL (-1) if it is out of bounds.
548
U_STABLE UChar32 U_EXPORT2
549
utext_previous32From(UText *ut, int64_t nativeIndex);
552
* Get the current iterator position, which can range from 0 to
553
* the length of the text.
554
* The position is a native index into the input text, in whatever format it
555
* may have (possibly UTF-8 for example), and may not always be the same as
556
* the corresponding UChar (UTF-16) index.
557
* The returned position will always be aligned to a code point boundary.
559
* @param ut the text to be accessed.
560
* @return the current index position, in the native units of the text provider.
563
U_STABLE int64_t U_EXPORT2
564
utext_getNativeIndex(const UText *ut);
567
* Set the current iteration position to the nearest code point
568
* boundary at or preceding the specified index.
569
* The index is in the native units of the original input text.
570
* If the index is out of range, it will be pinned to be within
571
* the range of the input text.
573
* It will usually be more efficient to begin an iteration
574
* using the functions utext_next32From() or utext_previous32From()
575
* rather than setIndex().
577
* Moving the index position to an adjacent character is best done
578
* with utext_next32(), utext_previous32() or utext_moveIndex32().
579
* Attempting to do direct arithmetic on the index position is
580
* complicated by the fact that the size (in native units) of a
581
* character depends on the underlying representation of the character
582
* (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
585
* @param ut the text to be accessed.
586
* @param nativeIndex the native unit index of the new iteration position.
589
U_STABLE void U_EXPORT2
590
utext_setNativeIndex(UText *ut, int64_t nativeIndex);
593
* Move the iterator postion by delta code points. The number of code points
594
* is a signed number; a negative delta will move the iterator backwards,
595
* towards the start of the text.
597
* The index is moved by <code>delta</code> code points
598
* forward or backward, but no further backward than to 0 and
599
* no further forward than to utext_nativeLength().
600
* The resulting index value will be in between 0 and length, inclusive.
602
* @param ut the text to be accessed.
603
* @param delta the signed number of code points to move the iteration position.
604
* @return TRUE if the position could be moved the requested number of positions while
605
* staying within the range [0 - text length].
608
U_STABLE UBool U_EXPORT2
609
utext_moveIndex32(UText *ut, int32_t delta);
612
* Get the native index of the character preceeding the current position.
613
* If the iteration position is already at the start of the text, zero
615
* The value returned is the same as that obtained from the following sequence,
616
* but without the side effect of changing the iteration position.
619
* UText *ut = whatever;
622
* utext_getNativeIndex(ut);
625
* This function is most useful during forwards iteration, where it will get the
626
* native index of the character most recently returned from utext_next().
628
* @param ut the text to be accessed
629
* @return the native index of the character preceeding the current index position,
630
* or zero if the current position is at the start of the text.
633
U_STABLE int64_t U_EXPORT2
634
utext_getPreviousNativeIndex(UText *ut);
639
* Extract text from a UText into a UChar buffer. The range of text to be extracted
640
* is specified in the native indices of the UText provider. These may not necessarily
643
* The size (number of 16 bit UChars) of the data to be extracted is returned. The
644
* full number of UChars is returned, even when the extracted text is truncated
645
* because the specified buffer size is too small.
647
* The extracted string will (if you are a user) / must (if you are a text provider)
648
* be NUL-terminated if there is sufficient space in the destination buffer. This
649
* terminating NUL is not included in the returned length.
651
* The iteration index is left at the position following the last extracted character.
653
* @param ut the UText from which to extract data.
654
* @param nativeStart the native index of the first character to extract.\
655
* If the specified index is out of range,
656
* it will be pinned to to be within 0 <= index <= textLength
657
* @param nativeLimit the native string index of the position following the last
658
* character to extract. If the specified index is out of range,
659
* it will be pinned to to be within 0 <= index <= textLength.
660
* nativeLimit must be >= nativeStart.
661
* @param dest the UChar (UTF-16) buffer into which the extracted text is placed
662
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
663
* for precomputing the required size.
664
* @param status receives any error status.
665
* U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
666
* buffer was too small. Returns number of UChars for preflighting.
667
* @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
671
U_STABLE int32_t U_EXPORT2
672
utext_extract(UText *ut,
673
int64_t nativeStart, int64_t nativeLimit,
674
UChar *dest, int32_t destCapacity,
679
* Compare two UTexts (binary order). The comparison begins at each source text's
680
* iteration position. The iteration position of each UText will be left following
681
* the last character compared.
683
* The comparison is done in code point order; unlike u_strCompare, you
684
* cannot choose to use code unit order. This is because the characters
685
* in a UText are accessed one code point at a time, and may not be from a UTF-16
688
* This functions works with strings of different explicitly specified lengths
689
* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
690
* A length argument of -1 signifies that as much of the string should be used as
691
* is necessary to compare with the other string. If both length arguments are -1,
692
* the entire remaining portionss of both strings are used.
694
* @param s1 First source string.
695
* @param length1 Length of first source string in UTF-32 code points.
697
* @param s2 Second source string.
698
* @param length2 Length of second source string in UTF-32 code points.
700
* @return <0 or 0 or >0 as usual for string comparisons
702
* @internal ICU 4.4 technology preview
704
U_INTERNAL int32_t U_EXPORT2
705
utext_compare(UText *s1, int32_t length1,
706
UText *s2, int32_t length2);
709
* Compare two UTexts (binary order). The comparison begins at each source text's
710
* iteration position. The iteration position of each UText will be left following
711
* the last character compared. This method differs from utext_compare in that
712
* it accepts native limits rather than lengths for each string.
714
* The comparison is done in code point order; unlike u_strCompare, you
715
* cannot choose to use code unit order. This is because the characters
716
* in a UText are accessed one code point at a time, and may not be from a UTF-16
719
* This functions works with strings of different explicitly specified lengths
720
* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
721
* A limit argument of -1 signifies that as much of the string should be used as
722
* is necessary to compare with the other string. If both limit arguments are -1,
723
* the entire remaining portionss of both strings are used.
725
* @param s1 First source string.
726
* @param limit1 Native index of the last character in the first source string to be considered.
728
* @param s2 Second source string.
729
* @param limit2 Native index of the last character in the second source string to be considered.
731
* @return <0 or 0 or >0 as usual for string comparisons
733
* @internal ICU 4.4 technology preview
735
U_INTERNAL int32_t U_EXPORT2
736
utext_compareNativeLimit(UText *s1, int64_t limit1,
737
UText *s2, int64_t limit2);
740
* Compare two UTexts case-insensitively using full case folding. The comparison
741
* begins at each source text's iteration position. The iteration position of each
742
* UText will be left following the last character compared.
744
* The comparison is done in code point order; this is because the characters
745
* in a UText are accessed one code point at a time, and may not be from a UTF-16
748
* This functions works with strings of different explicitly specified lengths
749
* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
750
* A length argument of -1 signifies that as much of the string should be used as
751
* is necessary to compare with the other string. If both length arguments are -1,
752
* the entire remaining portionss of both strings are used.
754
* @param s1 First source string.
755
* @param length1 Length of first source string in UTF-32 code points.
757
* @param s2 Second source string.
758
* @param length2 Length of second source string in UTF-32 code points.
760
* @param options A bit set of options:
761
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
762
* Comparison in code point order with default case folding.
764
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
766
* @param pErrorCode Must be a valid pointer to an error code value,
767
* which must not indicate a failure before the function call.
769
* @return <0 or 0 or >0 as usual for string comparisons
771
* @internal ICU 4.4 technology preview
773
U_INTERNAL int32_t U_EXPORT2
774
utext_caseCompare(UText *s1, int32_t length1,
775
UText *s2, int32_t length2,
776
uint32_t options, UErrorCode *pErrorCode);
779
* Compare two UTexts case-insensitively using full case folding. The comparison
780
* begins at each source text's iteration position. The iteration position of each
781
* UText will be left following the last character compared. This method differs from
782
* utext_caseCompare in that it accepts native limits rather than lengths for each
785
* The comparison is done in code point order; this is because the characters
786
* in a UText are accessed one code point at a time, and may not be from a UTF-16
789
* This functions works with strings of different explicitly specified lengths
790
* unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
791
* A limit argument of -1 signifies that as much of the string should be used as
792
* is necessary to compare with the other string. If both length arguments are -1,
793
* the entire remaining portionss of both strings are used.
795
* @param s1 First source string.
796
* @param limit1 Native index of the last character in the first source string to be considered.
798
* @param s2 Second source string.
799
* @param limit2 Native index of the last character in the second source string to be considered.
801
* @param options A bit set of options:
802
* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
803
* Comparison in code point order with default case folding.
805
* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
807
* @param pErrorCode Must be a valid pointer to an error code value,
808
* which must not indicate a failure before the function call.
810
* @return <0 or 0 or >0 as usual for string comparisons
812
* @internal ICU 4.4 technology preview
814
U_INTERNAL int32_t U_EXPORT2
815
utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
816
UText *s2, int64_t limit2,
817
uint32_t options, UErrorCode *pErrorCode);
820
/************************************************************************************
822
* #define inline versions of selected performance-critical text access functions
823
* Caution: do not use auto increment++ or decrement-- expressions
824
* as parameters to these macros.
826
* For most use, where there is no extreme performance constraint, the
827
* normal, non-inline functions are a better choice. The resulting code
828
* will be smaller, and, if the need ever arises, easier to debug.
830
* These are implemented as #defines rather than real functions
831
* because there is no fully portable way to do inline functions in plain C.
833
************************************************************************************/
836
* inline version of utext_current32(), for performance-critical situations.
838
* Get the code point at the current iteration position of the UText.
839
* Returns U_SENTINEL (-1) if the position is at the end of the
842
* @internal ICU 4.4 technology preview
844
#define UTEXT_CURRENT32(ut) \
845
((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
846
((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
849
* inline version of utext_next32(), for performance-critical situations.
851
* Get the code point at the current iteration position of the UText, and
852
* advance the position to the first index following the character.
853
* This is a post-increment operation.
854
* Returns U_SENTINEL (-1) if the position is at the end of the
859
#define UTEXT_NEXT32(ut) \
860
((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
861
((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
864
* inline version of utext_previous32(), for performance-critical situations.
866
* Move the iterator position to the character (code point) whose
867
* index precedes the current position, and return that character.
868
* This is a pre-decrement operation.
869
* Returns U_SENTINEL (-1) if the position is at the start of the text.
873
#define UTEXT_PREVIOUS32(ut) \
874
((ut)->chunkOffset > 0 && \
875
(ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
876
(ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
879
* inline version of utext_getNativeIndex(), for performance-critical situations.
881
* Get the current iterator position, which can range from 0 to
882
* the length of the text.
883
* The position is a native index into the input text, in whatever format it
884
* may have (possibly UTF-8 for example), and may not always be the same as
885
* the corresponding UChar (UTF-16) index.
886
* The returned position will always be aligned to a code point boundary.
890
#define UTEXT_GETNATIVEINDEX(ut) \
891
((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
892
(ut)->chunkNativeStart+(ut)->chunkOffset : \
893
(ut)->pFuncs->mapOffsetToNative(ut))
896
* inline version of utext_setNativeIndex(), for performance-critical situations.
898
* Set the current iteration position to the nearest code point
899
* boundary at or preceding the specified index.
900
* The index is in the native units of the original input text.
901
* If the index is out of range, it will be pinned to be within
902
* the range of the input text.
906
#define UTEXT_SETNATIVEINDEX(ut, ix) \
907
{ int64_t __offset = (ix) - (ut)->chunkNativeStart; \
908
if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
909
(ut)->chunkOffset=(int32_t)__offset; \
911
utext_setNativeIndex((ut), (ix)); } }
915
/************************************************************************************
917
* Functions related to writing or modifying the text.
918
* These will work only with modifiable UTexts. Attempting to
919
* modify a read-only UText will return an error status.
921
************************************************************************************/
925
* Return TRUE if the text can be written (modified) with utext_replace() or
926
* utext_copy(). For the text to be writable, the text provider must
927
* be of a type that supports writing and the UText must not be frozen.
929
* Attempting to modify text when utext_isWriteable() is FALSE will fail -
930
* the text will not be modified, and an error will be returned from the function
931
* that attempted the modification.
933
* @param ut the UText to be tested.
934
* @return TRUE if the text is modifiable.
936
* @see utext_freeze()
937
* @see utext_replace()
942
U_STABLE UBool U_EXPORT2
943
utext_isWritable(const UText *ut);
947
* Test whether there is meta data associated with the text.
948
* @see Replaceable::hasMetaData()
950
* @param ut The UText to be tested
951
* @return TRUE if the underlying text includes meta data.
954
U_STABLE UBool U_EXPORT2
955
utext_hasMetaData(const UText *ut);
959
* Replace a range of the original text with a replacement text.
961
* Leaves the current iteration position at the position following the
962
* newly inserted replacement text.
964
* This function is only available on UText types that support writing,
965
* that is, ones where utext_isWritable() returns TRUE.
967
* When using this function, there should be only a single UText opened onto the
968
* underlying native text string. Behavior after a replace operation
969
* on a UText is undefined for any other additional UTexts that refer to the
972
* @param ut the UText representing the text to be operated on.
973
* @param nativeStart the native index of the start of the region to be replaced
974
* @param nativeLimit the native index of the character following the region to be replaced.
975
* @param replacementText pointer to the replacement text
976
* @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
977
* @param status receives any error status. Possible errors include
978
* U_NO_WRITE_PERMISSION
980
* @return The signed number of (native) storage units by which
981
* the length of the text expanded or contracted.
985
U_STABLE int32_t U_EXPORT2
986
utext_replace(UText *ut,
987
int64_t nativeStart, int64_t nativeLimit,
988
const UChar *replacementText, int32_t replacementLength,
995
* Copy or move a substring from one position to another within the text,
996
* while retaining any metadata associated with the text.
997
* This function is used to duplicate or reorder substrings.
998
* The destination index must not overlap the source range.
1000
* The text to be copied or moved is inserted at destIndex;
1001
* it does not replace or overwrite any existing text.
1003
* The iteration position is left following the newly inserted text
1004
* at the destination position.
1006
* This function is only available on UText types that support writing,
1007
* that is, ones where utext_isWritable() returns TRUE.
1009
* When using this function, there should be only a single UText opened onto the
1010
* underlying native text string. Behavior after a copy operation
1011
* on a UText is undefined in any other additional UTexts that refer to the
1014
* @param ut The UText representing the text to be operated on.
1015
* @param nativeStart The native index of the start of the region to be copied or moved
1016
* @param nativeLimit The native index of the character position following the region
1018
* @param destIndex The native destination index to which the source substring is
1020
* @param move If TRUE, then the substring is moved, not copied/duplicated.
1021
* @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
1025
U_STABLE void U_EXPORT2
1026
utext_copy(UText *ut,
1027
int64_t nativeStart, int64_t nativeLimit,
1030
UErrorCode *status);
1035
* Freeze a UText. This prevents any modification to the underlying text itself
1036
* by means of functions operating on this UText.
1039
* Once frozen, a UText can not be unfrozen. The intent is to ensure
1040
* that a the text underlying a frozen UText wrapper cannot be modified via that UText.
1043
* Caution: freezing a UText will disable changes made via the specific
1044
* frozen UText wrapper only; it will not have any effect on the ability to
1045
* directly modify the text by bypassing the UText. Any such backdoor modifications
1046
* are always an error while UText access is occuring because the underlying
1047
* text can get out of sync with UText's buffering.
1050
* @param ut The UText to be frozen.
1051
* @see utext_isWritable()
1054
U_STABLE void U_EXPORT2
1055
utext_freeze(UText *ut);
1059
* UText provider properties (bit field indexes).
1066
* It is potentially time consuming for the provider to determine the length of the text.
1069
UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
1071
* Text chunks remain valid and usable until the text object is modified or
1072
* deleted, not just until the next time the access() function is called
1073
* (which is the default).
1076
UTEXT_PROVIDER_STABLE_CHUNKS = 2,
1078
* The provider supports modifying the text via the replace() and copy()
1083
UTEXT_PROVIDER_WRITABLE = 3,
1085
* There is meta data associated with the text.
1086
* @see Replaceable::hasMetaData()
1089
UTEXT_PROVIDER_HAS_META_DATA = 4,
1091
* Text provider owns the text storage.
1092
* Generally occurs as the result of a deep clone of the UText.
1093
* When closing the UText, the associated text must
1094
* also be closed/deleted/freed/ whatever is appropriate.
1097
UTEXT_PROVIDER_OWNS_TEXT = 5
1101
* Function type declaration for UText.clone().
1103
* clone a UText. Much like opening a UText where the source text is itself
1106
* A deep clone will copy both the UText data structures and the underlying text.
1107
* The original and cloned UText will operate completely independently; modifications
1108
* made to the text in one will not effect the other. Text providers are not
1109
* required to support deep clones. The user of clone() must check the status return
1110
* and be prepared to handle failures.
1112
* A shallow clone replicates only the UText data structures; it does not make
1113
* a copy of the underlying text. Shallow clones can be used as an efficient way to
1114
* have multiple iterators active in a single text string that is not being
1117
* A shallow clone operation must not fail except for truly exceptional conditions such
1118
* as memory allocation failures.
1120
* A UText and its clone may be safely concurrently accessed by separate threads.
1121
* This is true for both shallow and deep clones.
1122
* It is the responsibility of the Text Provider to ensure that this thread safety
1123
* constraint is met.
1126
* @param dest A UText struct to be filled in with the result of the clone operation,
1127
* or NULL if the clone function should heap-allocate a new UText struct.
1128
* @param src The UText to be cloned.
1129
* @param deep TRUE to request a deep clone, FALSE for a shallow clone.
1130
* @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
1131
* should be returned if the text provider is unable to clone the
1133
* @return The newly created clone, or NULL if the clone operation failed.
1137
typedef UText * U_CALLCONV
1138
UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
1142
* Function type declaration for UText.nativeLength().
1144
* @param ut the UText to get the length of.
1145
* @return the length, in the native units of the original text string.
1149
typedef int64_t U_CALLCONV
1150
UTextNativeLength(UText *ut);
1153
* Function type declaration for UText.access(). Get the description of the text chunk
1154
* containing the text at a requested native index. The UText's iteration
1155
* position will be left at the requested index. If the index is out
1156
* of bounds, the iteration position will be left at the start or end
1157
* of the string, as appropriate.
1159
* Chunks must begin and end on code point boundaries. A single code point
1160
* comprised of multiple storage units must never span a chunk boundary.
1163
* @param ut the UText being accessed.
1164
* @param nativeIndex Requested index of the text to be accessed.
1165
* @param forward If TRUE, then the returned chunk must contain text
1166
* starting from the index, so that start<=index<limit.
1167
* If FALSE, then the returned chunk must contain text
1168
* before the index, so that start<index<=limit.
1169
* @return True if the requested index could be accessed. The chunk
1170
* will contain the requested text.
1171
* False value if a chunk cannot be accessed
1172
* (the requested index is out of bounds).
1177
typedef UBool U_CALLCONV
1178
UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1181
* Function type declaration for UText.extract().
1183
* Extract text from a UText into a UChar buffer. The range of text to be extracted
1184
* is specified in the native indices of the UText provider. These may not necessarily
1185
* be UTF-16 indices.
1187
* The size (number of 16 bit UChars) in the data to be extracted is returned. The
1188
* full amount is returned, even when the specified buffer size is smaller.
1190
* The extracted string will (if you are a user) / must (if you are a text provider)
1191
* be NUL-terminated if there is sufficient space in the destination buffer.
1193
* @param ut the UText from which to extract data.
1194
* @param nativeStart the native index of the first characer to extract.
1195
* @param nativeLimit the native string index of the position following the last
1196
* character to extract.
1197
* @param dest the UChar (UTF-16) buffer into which the extracted text is placed
1198
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
1199
* for precomputing the required size.
1200
* @param status receives any error status.
1201
* If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
1203
* @return Number of UChars in the data. Does not include a trailing NUL.
1207
typedef int32_t U_CALLCONV
1208
UTextExtract(UText *ut,
1209
int64_t nativeStart, int64_t nativeLimit,
1210
UChar *dest, int32_t destCapacity,
1211
UErrorCode *status);
1214
* Function type declaration for UText.replace().
1216
* Replace a range of the original text with a replacement text.
1218
* Leaves the current iteration position at the position following the
1219
* newly inserted replacement text.
1221
* This function need only be implemented on UText types that support writing.
1223
* When using this function, there should be only a single UText opened onto the
1224
* underlying native text string. The function is responsible for updating the
1225
* text chunk within the UText to reflect the updated iteration position,
1226
* taking into account any changes to the underlying string's structure caused
1227
* by the replace operation.
1229
* @param ut the UText representing the text to be operated on.
1230
* @param nativeStart the index of the start of the region to be replaced
1231
* @param nativeLimit the index of the character following the region to be replaced.
1232
* @param replacementText pointer to the replacement text
1233
* @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
1234
* @param status receives any error status. Possible errors include
1235
* U_NO_WRITE_PERMISSION
1237
* @return The signed number of (native) storage units by which
1238
* the length of the text expanded or contracted.
1242
typedef int32_t U_CALLCONV
1243
UTextReplace(UText *ut,
1244
int64_t nativeStart, int64_t nativeLimit,
1245
const UChar *replacementText, int32_t replacmentLength,
1246
UErrorCode *status);
1249
* Function type declaration for UText.copy().
1251
* Copy or move a substring from one position to another within the text,
1252
* while retaining any metadata associated with the text.
1253
* This function is used to duplicate or reorder substrings.
1254
* The destination index must not overlap the source range.
1256
* The text to be copied or moved is inserted at destIndex;
1257
* it does not replace or overwrite any existing text.
1259
* This function need only be implemented for UText types that support writing.
1261
* When using this function, there should be only a single UText opened onto the
1262
* underlying native text string. The function is responsible for updating the
1263
* text chunk within the UText to reflect the updated iteration position,
1264
* taking into account any changes to the underlying string's structure caused
1265
* by the replace operation.
1267
* @param ut The UText representing the text to be operated on.
1268
* @param nativeStart The index of the start of the region to be copied or moved
1269
* @param nativeLimit The index of the character following the region to be replaced.
1270
* @param nativeDest The destination index to which the source substring is copied or moved.
1271
* @param move If TRUE, then the substring is moved, not copied/duplicated.
1272
* @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
1276
typedef void U_CALLCONV
1277
UTextCopy(UText *ut,
1278
int64_t nativeStart, int64_t nativeLimit,
1281
UErrorCode *status);
1284
* Function type declaration for UText.mapOffsetToNative().
1285
* Map from the current UChar offset within the current text chunk to
1286
* the corresponding native index in the original source text.
1288
* This is required only for text providers that do not use native UTF-16 indexes.
1290
* @param ut the UText.
1291
* @return Absolute (native) index corresponding to chunkOffset in the current chunk.
1292
* The returned native index should always be to a code point boundary.
1296
typedef int64_t U_CALLCONV
1297
UTextMapOffsetToNative(const UText *ut);
1300
* Function type declaration for UText.mapIndexToUTF16().
1301
* Map from a native index to a UChar offset within a text chunk.
1302
* Behavior is undefined if the native index does not fall within the
1305
* This function is required only for text providers that do not use native UTF-16 indexes.
1307
* @param ut The UText containing the text chunk.
1308
* @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
1309
* @return Chunk-relative UTF-16 offset corresponding to the specified native
1314
typedef int32_t U_CALLCONV
1315
UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1319
* Function type declaration for UText.utextClose().
1321
* A Text Provider close function is only required for provider types that make
1322
* allocations in their open function (or other functions) that must be
1323
* cleaned when the UText is closed.
1325
* The allocation of the UText struct itself and any "extra" storage
1326
* associated with the UText is handled by the common UText implementation
1327
* and does not require provider specific cleanup in a close function.
1329
* Most UText provider implementations do not need to implement this function.
1331
* @param ut A UText object to be closed.
1335
typedef void U_CALLCONV
1336
UTextClose(UText *ut);
1340
* (public) Function dispatch table for UText.
1341
* Conceptually very much like a C++ Virtual Function Table.
1342
* This struct defines the organization of the table.
1343
* Each text provider implementation must provide an
1344
* actual table that is initialized with the appropriate functions
1345
* for the type of text being handled.
1350
* (public) Function table size, sizeof(UTextFuncs)
1351
* Intended for use should the table grow to accomodate added
1352
* functions in the future, to allow tests for older format
1353
* function tables that do not contain the extensions.
1355
* Fields are placed for optimal alignment on
1356
* 32/64/128-bit-pointer machines, by normally grouping together
1366
* (private) Alignment padding.
1367
* Do not use, reserved for use by the UText framework only.
1370
int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
1374
* (public) Function pointer for UTextClone
1382
* (public) function pointer for UTextLength
1383
* May be expensive to compute!
1388
UTextNativeLength *nativeLength;
1391
* (public) Function pointer for UTextAccess.
1396
UTextAccess *access;
1399
* (public) Function pointer for UTextExtract.
1404
UTextExtract *extract;
1407
* (public) Function pointer for UTextReplace.
1412
UTextReplace *replace;
1415
* (public) Function pointer for UTextCopy.
1423
* (public) Function pointer for UTextMapOffsetToNative.
1425
* @see UTextMapOffsetToNative
1428
UTextMapOffsetToNative *mapOffsetToNative;
1431
* (public) Function pointer for UTextMapNativeIndexToUTF16.
1433
* @see UTextMapNativeIndexToUTF16
1436
UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
1439
* (public) Function pointer for UTextClose.
1447
* (private) Spare function pointer
1453
* (private) Spare function pointer
1459
* (private) Spare function pointer
1466
* Function dispatch table for UText
1469
typedef struct UTextFuncs UTextFuncs;
1472
* UText struct. Provides the interface between the generic UText access code
1473
* and the UText provider code that works on specific kinds of
1474
* text (UTF-8, noncontiguous UTF-16, whatever.)
1476
* Applications that are using predefined types of text providers
1477
* to pass text data to ICU services will have no need to view the
1478
* internals of the UText structs that they open.
1484
* (private) Magic. Used to help detect when UText functions are handed
1485
* invalid or unitialized UText structs.
1486
* utext_openXYZ() functions take an initialized,
1487
* but not necessarily open, UText struct as an
1488
* optional fill-in parameter. This magic field
1489
* is used to check for that initialization.
1490
* Text provider close functions must NOT clear
1491
* the magic field because that would prevent
1492
* reuse of the UText struct.
1499
* (private) Flags for managing the allocation and freeing of
1500
* memory associated with this UText.
1507
* Text provider properties. This set of flags is maintainted by the
1508
* text provider implementation.
1511
int32_t providerProperties;
1514
* (public) sizeOfStruct=sizeof(UText)
1515
* Allows possible backward compatible extension.
1519
int32_t sizeOfStruct;
1521
/* ------ 16 byte alignment boundary ----------- */
1525
* (protected) Native index of the first character position following
1526
* the current chunk.
1529
int64_t chunkNativeLimit;
1532
* (protected) Size in bytes of the extra space (pExtra).
1538
* (protected) The highest chunk offset where native indexing and
1539
* chunk (UTF-16) indexing correspond. For UTF-16 sources, value
1540
* will be equal to chunkLength.
1544
int32_t nativeIndexingLimit;
1546
/* ---- 16 byte alignment boundary------ */
1549
* (protected) Native index of the first character in the text chunk.
1552
int64_t chunkNativeStart;
1555
* (protected) Current iteration position within the text chunk (UTF-16 buffer).
1556
* This is the index to the character that will be returned by utext_next32().
1559
int32_t chunkOffset;
1562
* (protected) Length the text chunk (UTF-16 buffer), in UChars.
1565
int32_t chunkLength;
1567
/* ---- 16 byte alignment boundary-- */
1571
* (protected) pointer to a chunk of text in UTF-16 format.
1572
* May refer either to original storage of the source of the text, or
1573
* if conversion was required, to a buffer owned by the UText.
1576
const UChar *chunkContents;
1579
* (public) Pointer to Dispatch table for accessing functions for this UText.
1582
const UTextFuncs *pFuncs;
1585
* (protected) Pointer to additional space requested by the
1586
* text provider during the utext_open operation.
1592
* (protected) Pointer to string or text-containin object or similar.
1593
* This is the source of the text that this UText is wrapping, in a format
1594
* that is known to the text provider functions.
1597
const void *context;
1599
/* --- 16 byte alignment boundary--- */
1602
* (protected) Pointer fields available for use by the text provider.
1603
* Not used by UText common code.
1608
* (protected) Pointer fields available for use by the text provider.
1609
* Not used by UText common code.
1614
* (protected) Pointer fields available for use by the text provider.
1615
* Not used by UText common code.
1621
* Private field reserved for future use by the UText framework
1622
* itself. This is not to be touched by the text providers.
1628
/* --- 16 byte alignment boundary--- */
1632
* (protected) Integer field reserved for use by the text provider.
1633
* Not used by the UText framework, or by the client (user) of the UText.
1639
* (protected) Integer field reserved for use by the text provider.
1640
* Not used by the UText framework, or by the client (user) of the UText.
1646
* (protected) Integer field reserved for use by the text provider.
1647
* Not used by the UText framework, or by the client (user) of the UText.
1652
/* ---- 16 byte alignment boundary---- */
1656
* Private field reserved for future use by the UText framework
1657
* itself. This is not to be touched by the text providers.
1662
* Private field reserved for future use by the UText framework
1663
* itself. This is not to be touched by the text providers.
1668
* Private field reserved for future use by the UText framework
1669
* itself. This is not to be touched by the text providers.
1677
* Common function for use by Text Provider implementations to allocate and/or initialize
1678
* a new UText struct. To be called in the implementation of utext_open() functions.
1679
* If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
1680
* If the supplied UText is already open, the provider's close function will be called
1681
* so that the struct can be reused by the open that is in progress.
1683
* @param ut pointer to a UText struct to be re-used, or null if a new UText
1684
* should be allocated.
1685
* @param extraSpace The amount of additional space to be allocated as part
1686
* of this UText, for use by types of providers that require
1687
* additional storage.
1688
* @param status Errors are returned here.
1689
* @return pointer to the UText, allocated if necessary, with extra space set up if requested.
1692
U_STABLE UText * U_EXPORT2
1693
utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1697
* Value used to help identify correctly initialized UText structs.
1698
* Note: must be publicly visible so that UTEXT_INITIALIZER can access it.
1701
UTEXT_MAGIC = 0x345ad82c
1705
* initializer to be used with local (stack) instances of a UText
1706
* struct. UText structs must be initialized before passing
1707
* them to one of the utext_open functions.
1711
#define UTEXT_INITIALIZER { \
1712
UTEXT_MAGIC, /* magic */ \
1714
0, /* providerProps */ \
1715
sizeof(UText), /* sizeOfStruct */ \
1716
0, /* chunkNativeLimit */ \
1717
0, /* extraSize */ \
1718
0, /* nativeIndexingLimit */ \
1719
0, /* chunkNativeStart */ \
1720
0, /* chunkOffset */ \
1721
0, /* chunkLength */ \
1722
NULL, /* chunkContents */ \
1723
NULL, /* pFuncs */ \
1724
NULL, /* pExtra */ \
1725
NULL, /* context */ \
1726
NULL, NULL, NULL, /* p, q, r */ \
1728
0, 0, 0, /* a, b, c */ \
1729
0, 0, 0 /* privA,B,C, */ \