2
**********************************************************************
3
* Copyright (C) 2001 IBM and others. All rights reserved.
4
**********************************************************************
5
* Date Name Description
6
* 08/13/2001 synwee Creation.
7
**********************************************************************
12
#include "unicode/utypes.h"
13
#include "unicode/ucol.h"
14
#include "unicode/ucoleitr.h"
15
#include "unicode/ubrk.h"
17
#define INITIAL_ARRAY_SIZE_ 256
18
#define MAX_TABLE_SIZE_ 257
21
// required since collation element iterator does not have a getText API
23
int32_t textLength; // exact length
25
UBool isCanonicalMatch;
26
UBreakIterator *breakIter;
27
// value USEARCH_DONE is the default value
28
// if we are not at the start of the text or the end of the text,
29
// depending on the iteration direction and matchedIndex is USEARCH_DONE
30
// it means that we can find any more matches in that particular direction
32
int32_t matchedLength;
33
UBool isForwardSearching;
39
int32_t textLength; // exact length
40
// length required for backwards ce comparison
43
uint32_t CEBuffer[INITIAL_ARRAY_SIZE_];
44
UBool hasPrefixAccents;
45
UBool hasSuffixAccents;
46
int16_t defaultShiftSize;
47
int16_t shift[MAX_TABLE_SIZE_];
48
int16_t backShift[MAX_TABLE_SIZE_];
51
struct UStringSearch {
52
struct USearch *search;
53
struct UPattern pattern;
54
const UCollator *collator;
55
// positions within the collation element iterator is used to determine
56
// if we are at the start of the text.
57
UCollationElements *textIter;
58
// utility collation element, used throughout program for temporary
60
UCollationElements *utilIter;
63
UCollationStrength strength;
67
UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
68
UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
72
* Exact matches without checking for the ends for extra accents.
73
* The match after the position within the collation element iterator is to be
75
* After a match is found the offset in the collation element iterator will be
76
* shifted to the start of the match.
77
* Implementation note:
78
* For tertiary we can't use the collator->tertiaryMask, that is a
79
* preprocessed mask that takes into account case options. since we are only
80
* concerned with exact matches, we don't need that.
81
* Alternate handling - since only the 16 most significant digits is only used,
82
* we can safely do a compare without masking if the ce is a variable, we mask
83
* and get only the primary values no shifting to quartenary is required since
84
* all primary values less than variabletop will need to be masked off anyway.
85
* If the end character is composite and the pattern ce does not match the text
86
* ce, we skip it until we find a match in the end composite character or when
87
* it has passed the character. This is so that we can match pattern "a" with
89
* @param strsrch string search data
90
* @param status error status if any
91
* @return TRUE if an exact match is found, FALSE otherwise
94
UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
98
* According to the definition, matches found here will include the whole span
99
* of beginning and ending accents if it overlaps that region.
100
* @param strsrch string search data
101
* @param status error status if any
102
* @return TRUE if a canonical match is found, FALSE otherwise
105
UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
108
* Gets the previous match.
109
* Comments follows from handleNextExact
110
* @param strsrch string search data
111
* @param status error status if any
114
UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
118
* According to the definition, matches found here will include the whole span
119
* of beginning and ending accents if it overlaps that region.
120
* @param strsrch string search data
121
* @param status error status if any
122
* @return TRUE if a canonical match is found, FALSE otherwise
125
UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,