2
**********************************************************************
3
* Copyright (C) 1999-2007, International Business Machines Corporation
4
* and others. All Rights Reserved.
5
**********************************************************************
6
* Date Name Description
7
* 11/17/99 aliu Creation.
8
**********************************************************************
13
#include "unicode/utypes.h"
15
#if !UCONFIG_NO_TRANSLITERATION
17
#include "unicode/uobject.h"
18
#include "unicode/utrans.h"
24
class TransliterationRule;
25
class TransliterationRuleData;
31
* A set of rules for a <code>RuleBasedTransliterator</code>.
34
class TransliterationRuleSet : public UMemory {
36
* Vector of rules, in the order added. This is used while the
37
* rule set is getting built. After that, freeze() reorders and
38
* indexes the rules into rules[]. Any given rule is stored once
39
* in ruleVector, and one or more times in rules[]. ruleVector
40
* owns and deletes the rules.
45
* Sorted and indexed table of rules. This is created by freeze()
46
* from the rules in ruleVector. It contains alias pointers to
47
* the rules in ruleVector. It is zero before freeze() is called
48
* and non-zero thereafter.
50
TransliterationRule** rules;
53
* Index table. For text having a first character c, compute x = c&0xFF.
54
* Now use rules[index[x]..index[x+1]-1]. This index table is created by
55
* freeze(). Before freeze() is called it contains garbage.
60
* Length of the longest preceding context
62
int32_t maxContextLength;
67
* Construct a new empty rule set.
68
* @param status Output parameter filled in with success or failure status.
70
TransliterationRuleSet(UErrorCode& status);
75
TransliterationRuleSet(const TransliterationRuleSet&);
80
virtual ~TransliterationRuleSet();
83
* Change the data object that this rule belongs to. Used
84
* internally by the TransliterationRuleData copy constructor.
85
* @param data the new data value to be set.
87
void setData(const TransliterationRuleData* data);
90
* Return the maximum context length.
91
* @return the length of the longest preceding context.
93
virtual int32_t getMaximumContextLength(void) const;
96
* Add a rule to this set. Rules are added in order, and order is
97
* significant. The last call to this method must be followed by
98
* a call to <code>freeze()</code> before the rule set is used.
99
* This method must <em>not</em> be called after freeze() has been
102
* @param adoptedRule the rule to add
104
virtual void addRule(TransliterationRule* adoptedRule,
108
* Check this for masked rules and index it to optimize performance.
109
* The sequence of operations is: (1) add rules to a set using
110
* <code>addRule()</code>; (2) freeze the set using
111
* <code>freeze()</code>; (3) use the rule set. If
112
* <code>addRule()</code> is called after calling this method, it
113
* invalidates this object, and this method must be called again.
114
* That is, <code>freeze()</code> may be called multiple times,
115
* although for optimal performance it shouldn't be.
116
* @param parseError A pointer to UParseError to receive information about errors
118
* @param status Output parameter filled in with success or failure status.
120
virtual void freeze(UParseError& parseError, UErrorCode& status);
123
* Transliterate the given text with the given UTransPosition
124
* indices. Return TRUE if the transliteration should continue
125
* or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
126
* Note that FALSE is only ever returned if isIncremental is TRUE.
127
* @param text the text to be transliterated
128
* @param index the position indices, which will be updated
129
* @param isIncremental if TRUE, assume new text may be inserted
130
* at index.limit, and return FALSE if thre is a partial match.
131
* @return TRUE unless a U_PARTIAL_MATCH has been obtained,
132
* indicating that transliteration should stop until more text
135
UBool transliterate(Replaceable& text,
136
UTransPosition& index,
137
UBool isIncremental);
140
* Create rule strings that represents this rule set.
141
* @param result string to receive the rule strings. Current
142
* contents will be deleted.
143
* @param escapeUnprintable True, will escape the unprintable characters
144
* @return A reference to 'result'.
146
virtual UnicodeString& toRules(UnicodeString& result,
147
UBool escapeUnprintable) const;
150
* Return the set of all characters that may be modified
151
* (getTarget=false) or emitted (getTarget=true) by this set.
153
UnicodeSet& getSourceTargetSet(UnicodeSet& result,
154
UBool getTarget) const;
158
TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
163
#endif /* #if !UCONFIG_NO_TRANSLITERATION */