2
* Copyright (C) 2001-2004, International Business Machines Corporation
3
* and others. All Rights Reserved.
4
**********************************************************************
5
* Date Name Description
6
* 07/23/01 aliu Creation.
7
**********************************************************************
12
#include "unicode/utypes.h"
14
#if !UCONFIG_NO_TRANSLITERATION
16
#include "unicode/unistr.h"
17
#include "unicode/unifunct.h"
18
#include "unicode/unimatch.h"
19
#include "unicode/unirepl.h"
23
class TransliterationRuleData;
26
* An object that matches a fixed input string, implementing the
27
* UnicodeMatcher API. This object also implements the
28
* UnicodeReplacer API, allowing it to emit the matched text as
29
* output. Since the match text may contain flexible match elements,
30
* such as UnicodeSets, the emitted text is not the match pattern, but
31
* instead a substring of the actual matched text. Following
32
* convention, the output text is the leftmost match seen up to this
35
* A StringMatcher may represent a segment, in which case it has a
36
* positive segment number. This affects how the matcher converts
37
* itself to a pattern but does not otherwise affect its function.
39
* A StringMatcher that is not a segment should not be used as a
42
class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer {
47
* Construct a matcher that matches the given pattern string.
48
* @param string the pattern to be matched, possibly containing
49
* stand-ins that represent nested UnicodeMatcher objects.
50
* @param start inclusive start index of text to be replaced
51
* @param limit exclusive end index of text to be replaced;
52
* must be greater than or equal to start
53
* @param segmentNum the segment number from 1..n, or 0 if this is
55
* @param data context object mapping stand-ins to
56
* UnicodeMatcher objects.
58
StringMatcher(const UnicodeString& string,
62
const TransliterationRuleData& data);
66
* @param o the object to be copied.
68
StringMatcher(const StringMatcher& o);
73
virtual ~StringMatcher();
76
* Implement UnicodeFunctor
77
* @return a copy of the object.
79
virtual UnicodeFunctor* clone() const;
82
* UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
83
* and return the pointer.
84
* @return the UnicodeMatcher point.
86
virtual UnicodeMatcher* toMatcher() const;
89
* UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer
90
* and return the pointer.
91
* @return the UnicodeReplacer pointer.
93
virtual UnicodeReplacer* toReplacer() const;
96
* Implement UnicodeMatcher
97
* @param text the text to be matched
98
* @param offset on input, the index into text at which to begin
99
* matching. On output, the limit of the matched text. The
100
* number of matched characters is the output value of offset
101
* minus the input value. Offset should always point to the
102
* HIGH SURROGATE (leading code unit) of a pair of surrogates,
103
* both on entry and upon return.
104
* @param limit the limit index of text to be matched. Greater
105
* than offset for a forward direction match, less than offset for
106
* a backward direction match. The last character to be
107
* considered for matching will be text.charAt(limit-1) in the
108
* forward direction or text.charAt(limit+1) in the backward
110
* @param incremental if TRUE, then assume further characters may
111
* be inserted at limit and check for partial matching. Otherwise
112
* assume the text as given is complete.
113
* @return a match degree value indicating a full match, a partial
114
* match, or a mismatch. If incremental is FALSE then
115
* U_PARTIAL_MATCH should never be returned.
117
virtual UMatchDegree matches(const Replaceable& text,
123
* Implement UnicodeMatcher
124
* @param result Output param to receive the pattern.
125
* @param escapeUnprintable if True then escape the unprintable characters.
126
* @return A reference to 'result'.
128
virtual UnicodeString& toPattern(UnicodeString& result,
129
UBool escapeUnprintable = FALSE) const;
132
* Implement UnicodeMatcher
133
* Returns TRUE if this matcher will match a character c, where c
134
* & 0xFF == v, at offset, in the forward direction (with limit >
135
* offset). This is used by <tt>RuleBasedTransliterator</tt> for
137
* @param v the given value
138
* @return TRUE if this matcher will match a character c,
139
* where c & 0xFF == v
141
virtual UBool matchesIndexValue(uint8_t v) const;
144
* Implement UnicodeMatcher
146
virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
149
* Implement UnicodeFunctor
151
virtual void setData(const TransliterationRuleData*);
154
* Replace characters in 'text' from 'start' to 'limit' with the
155
* output text of this object. Update the 'cursor' parameter to
156
* give the cursor position and return the length of the
159
* @param text the text to be matched
160
* @param start inclusive start index of text to be replaced
161
* @param limit exclusive end index of text to be replaced;
162
* must be greater than or equal to start
163
* @param cursor output parameter for the cursor position.
164
* Not all replacer objects will update this, but in a complete
165
* tree of replacer objects, representing the entire output side
166
* of a transliteration rule, at least one must update it.
167
* @return the number of 16-bit code units in the text replacing
168
* the characters at offsets start..(limit-1) in text
170
virtual int32_t replace(Replaceable& text,
176
* Returns a string representation of this replacer. If the
177
* result of calling this function is passed to the appropriate
178
* parser, typically TransliteratorParser, it will produce another
179
* replacer that is equal to this one.
180
* @param result the string to receive the pattern. Previous
181
* contents will be deleted.
182
* @param escapeUnprintable if TRUE then convert unprintable
183
* character to their hex escape representations, \\uxxxx or
184
* \\Uxxxxxxxx. Unprintable characters are defined by
185
* Utility.isUnprintable().
186
* @return a reference to 'result'.
188
virtual UnicodeString& toReplacerPattern(UnicodeString& result,
189
UBool escapeUnprintable) const;
192
* Remove any match data. This must be called before performing a
193
* set of matches with this segment.
198
* ICU "poor man's RTTI", returns a UClassID for the actual class.
202
virtual UClassID getDynamicClassID() const;
205
* ICU "poor man's RTTI", returns a UClassID for this class.
209
static UClassID U_EXPORT2 getStaticClassID();
212
* Union the set of all characters that may output by this object
213
* into the given set.
214
* @param toUnionTo the set into which to union the output characters
216
virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
221
* The text to be matched.
223
UnicodeString pattern;
226
* Context object that maps stand-ins to matcher and replacer
229
const TransliterationRuleData* data;
232
* The segment number, 1-based, or 0 if not a segment.
234
int32_t segmentNumber;
237
* Start offset, in the match text, of the <em>rightmost</em>
243
* Limit offset, in the match text, of the <em>rightmost</em>
252
#endif /* #if !UCONFIG_NO_TRANSLITERATION */