2
**********************************************************************
3
* Copyright (C) 2001-2010 IBM and others. All rights reserved.
4
**********************************************************************
5
* Date Name Description
6
* 08/13/2001 synwee Creation.
7
**********************************************************************
12
#include "unicode/utypes.h"
14
#if !UCONFIG_NO_COLLATION
16
#include "unicode/normalizer2.h"
17
#include "unicode/ucol.h"
18
#include "unicode/ucoleitr.h"
19
#include "unicode/ubrk.h"
21
#define INITIAL_ARRAY_SIZE_ 256
22
#define MAX_TABLE_SIZE_ 257
25
// required since collation element iterator does not have a getText API
27
int32_t textLength; // exact length
29
UBool isCanonicalMatch;
30
int16_t elementComparisonType;
31
UBreakIterator *internalBreakIter; //internal character breakiterator
32
UBreakIterator *breakIter;
33
// value USEARCH_DONE is the default value
34
// if we are not at the start of the text or the end of the text,
35
// depending on the iteration direction and matchedIndex is USEARCH_DONE
36
// it means that we can't find any more matches in that particular direction
38
int32_t matchedLength;
39
UBool isForwardSearching;
45
int32_t textLength; // exact length
46
// length required for backwards ce comparison
49
int32_t CEBuffer[INITIAL_ARRAY_SIZE_];
52
int64_t PCEBuffer[INITIAL_ARRAY_SIZE_];
53
UBool hasPrefixAccents;
54
UBool hasSuffixAccents;
55
int16_t defaultShiftSize;
56
int16_t shift[MAX_TABLE_SIZE_];
57
int16_t backShift[MAX_TABLE_SIZE_];
60
struct UStringSearch {
61
struct USearch *search;
62
struct UPattern pattern;
63
const UCollator *collator;
64
const U_NAMESPACE_QUALIFIER Normalizer2 *nfd;
65
// positions within the collation element iterator is used to determine
66
// if we are at the start of the text.
67
UCollationElements *textIter;
68
// utility collation element, used throughout program for temporary
70
UCollationElements *utilIter;
72
UCollationStrength strength;
76
UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
77
UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
81
* Exact matches without checking for the ends for extra accents.
82
* The match after the position within the collation element iterator is to be
84
* After a match is found the offset in the collation element iterator will be
85
* shifted to the start of the match.
86
* Implementation note:
87
* For tertiary we can't use the collator->tertiaryMask, that is a
88
* preprocessed mask that takes into account case options. since we are only
89
* concerned with exact matches, we don't need that.
90
* Alternate handling - since only the 16 most significant digits is only used,
91
* we can safely do a compare without masking if the ce is a variable, we mask
92
* and get only the primary values no shifting to quartenary is required since
93
* all primary values less than variabletop will need to be masked off anyway.
94
* If the end character is composite and the pattern ce does not match the text
95
* ce, we skip it until we find a match in the end composite character or when
96
* it has passed the character. This is so that we can match pattern "a" with
98
* @param strsrch string search data
99
* @param status error status if any
100
* @return TRUE if an exact match is found, FALSE otherwise
103
UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
107
* According to the definition, matches found here will include the whole span
108
* of beginning and ending accents if it overlaps that region.
109
* @param strsrch string search data
110
* @param status error status if any
111
* @return TRUE if a canonical match is found, FALSE otherwise
114
UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
117
* Gets the previous match.
118
* Comments follows from handleNextExact
119
* @param strsrch string search data
120
* @param status error status if any
121
* @return True if a exact math is found, FALSE otherwise.
124
UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
128
* According to the definition, matches found here will include the whole span
129
* of beginning and ending accents if it overlaps that region.
130
* @param strsrch string search data
131
* @param status error status if any
132
* @return TRUE if a canonical match is found, FALSE otherwise
135
UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
138
#endif /* #if !UCONFIG_NO_COLLATION */