2
**********************************************************************
3
* Copyright (c) 2001-2008, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
**********************************************************************
6
* Date Name Description
7
* 08/10/2001 aliu Creation.
8
**********************************************************************
13
#include "unicode/utypes.h"
15
#if !UCONFIG_NO_TRANSLITERATION
17
#include "unicode/uobject.h"
18
#include "unicode/translit.h"
24
class TransliteratorEntry;
25
class TransliteratorSpec;
28
//------------------------------------------------------------------
29
// TransliteratorAlias
30
//------------------------------------------------------------------
33
* A TransliteratorAlias object is returned by get() if the given ID
34
* actually translates into something else. The caller then invokes
35
* the create() method on the alias to create the actual
36
* transliterator, and deletes the alias.
38
* Why all the shenanigans? To prevent circular calls between
39
* the registry code and the transliterator code that deadlocks.
41
class TransliteratorAlias : public UMemory {
44
* Construct a simple alias (type == SIMPLE)
45
* @param aliasID the given id.
47
TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
50
* Construct a compound RBT alias (type == COMPOUND)
52
TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
53
UVector* adoptedTransliterators,
54
const UnicodeSet* compoundFilter);
57
* Construct a rules alias (type = RULES)
59
TransliteratorAlias(const UnicodeString& theID,
60
const UnicodeString& rules,
63
~TransliteratorAlias();
66
* The whole point of create() is that the caller must invoke
67
* it when the registry mutex is NOT held, to prevent deadlock.
68
* It may only be called once.
70
* Note: Only call create() if isRuleBased() returns FALSE.
72
* This method must be called *outside* of the TransliteratorRegistry
75
Transliterator* create(UParseError&, UErrorCode&);
78
* Return TRUE if this alias is rule-based. If so, the caller
79
* must call parse() on it, then call TransliteratorRegistry::reget().
81
UBool isRuleBased() const;
84
* If isRuleBased() returns TRUE, then the caller must call this
85
* method, followed by TransliteratorRegistry::reget(). The latter
86
* method must be called inside the TransliteratorRegistry mutex.
88
* Note: Only call parse() if isRuleBased() returns TRUE.
90
* This method must be called *outside* of the TransliteratorRegistry
91
* mutex, because it can instantiate Transliterators embedded in
92
* the rules via the "&Latin-Arabic()" syntax.
94
void parse(TransliteratorParser& parser,
95
UParseError& pe, UErrorCode& ec) const;
98
// We actually come in three flavors:
100
// Here aliasID is the alias string. Everything else is
101
// null, zero, empty.
103
// Here ID is the ID, aliasID is the idBlock, trans is the
104
// contained RBT, and idSplitPoint is the offet in aliasID
105
// where the contained RBT goes. compoundFilter is the
106
// compound filter, and it is _not_ owned.
108
// Here ID is the ID, aliasID is the rules string.
109
// idSplitPoint is the UTransDirection.
111
UnicodeString aliasesOrRules;
112
UVector* transes; // owned
113
const UnicodeSet* compoundFilter; // alias
114
UTransDirection direction;
115
enum { SIMPLE, COMPOUND, RULES } type;
117
TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
118
TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
123
* A registry of system transliterators. This is the data structure
124
* that implements the mapping between transliterator IDs and the data
125
* or function pointers used to create the corresponding
126
* transliterators. There is one instance of the registry that is
127
* created statically.
129
* The registry consists of a dynamic component -- a hashtable -- and
130
* a static component -- locale resource bundles. The dynamic store
131
* is semantically overlaid on the static store, so the static mapping
132
* can be dynamically overridden.
134
* This is an internal class that is only used by Transliterator.
135
* Transliterator maintains one static instance of this class and
136
* delegates all registry-related operations to it.
140
class TransliteratorRegistry : public UMemory {
146
* @param status Output param set to success/failure code.
148
TransliteratorRegistry(UErrorCode& status);
151
* Nonvirtual destructor -- this class is not subclassable.
153
~TransliteratorRegistry();
155
//------------------------------------------------------------------
157
//------------------------------------------------------------------
160
* Given a simple ID (forward direction, no inline filter, not
161
* compound) attempt to instantiate it from the registry. Return
164
* Return a non-NULL aliasReturn value if the ID points to an alias.
165
* We cannot instantiate it ourselves because the alias may contain
166
* filters or compounds, which we do not understand. Caller should
167
* make aliasReturn NULL before calling.
168
* @param ID the given ID
169
* @param aliasReturn output param to receive TransliteratorAlias;
170
* should be NULL on entry
171
* @param parseError Struct to recieve information on position
172
* of error if an error is encountered
173
* @param status Output param set to success/failure code.
175
Transliterator* get(const UnicodeString& ID,
176
TransliteratorAlias*& aliasReturn,
180
* The caller must call this after calling get(), if [a] calling get()
181
* returns an alias, and [b] the alias is rule based. In that
182
* situation the caller must call alias->parse() to do the parsing
183
* OUTSIDE THE REGISTRY MUTEX, then call this method to retry
184
* instantiating the transliterator.
186
* Note: Another alias might be returned by this method.
188
* This method (like all public methods of this class) must be called
189
* from within the TransliteratorRegistry mutex.
191
* @param aliasReturn output param to receive TransliteratorAlias;
192
* should be NULL on entry
194
Transliterator* reget(const UnicodeString& ID,
195
TransliteratorParser& parser,
196
TransliteratorAlias*& aliasReturn,
200
* Register a prototype (adopted). This adds an entry to the
201
* dynamic store, or replaces an existing entry. Any entry in the
202
* underlying static locale resource store is masked.
204
void put(Transliterator* adoptedProto,
209
* Register an ID and a factory function pointer. This adds an
210
* entry to the dynamic store, or replaces an existing entry. Any
211
* entry in the underlying static locale resource store is masked.
213
void put(const UnicodeString& ID,
214
Transliterator::Factory factory,
215
Transliterator::Token context,
220
* Register an ID and a resource name. This adds an entry to the
221
* dynamic store, or replaces an existing entry. Any entry in the
222
* underlying static locale resource store is masked.
224
void put(const UnicodeString& ID,
225
const UnicodeString& resourceName,
227
UBool readonlyResourceAlias,
232
* Register an ID and an alias ID. This adds an entry to the
233
* dynamic store, or replaces an existing entry. Any entry in the
234
* underlying static locale resource store is masked.
236
void put(const UnicodeString& ID,
237
const UnicodeString& alias,
238
UBool readonlyAliasAlias,
243
* Unregister an ID. This removes an entry from the dynamic store
244
* if there is one. The static locale resource store is
246
* @param ID the given ID.
248
void remove(const UnicodeString& ID);
250
//------------------------------------------------------------------
251
// Public ID and spec management
252
//------------------------------------------------------------------
255
* Return a StringEnumeration over the IDs currently registered
259
StringEnumeration* getAvailableIDs() const;
262
* == OBSOLETE - remove in ICU 3.4 ==
263
* Return the number of IDs currently registered with the system.
264
* To retrieve the actual IDs, call getAvailableID(i) with
265
* i from 0 to countAvailableIDs() - 1.
266
* @return the number of IDs currently registered with the system.
269
int32_t countAvailableIDs(void) const;
272
* == OBSOLETE - remove in ICU 3.4 ==
273
* Return the index-th available ID. index must be between 0
274
* and countAvailableIDs() - 1, inclusive. If index is out of
275
* range, the result of getAvailableID(0) is returned.
276
* @param index the given index.
277
* @return the index-th available ID. index must be between 0
278
* and countAvailableIDs() - 1, inclusive. If index is out of
279
* range, the result of getAvailableID(0) is returned.
282
const UnicodeString& getAvailableID(int32_t index) const;
285
* Return the number of registered source specifiers.
286
* @return the number of registered source specifiers.
288
int32_t countAvailableSources(void) const;
291
* Return a registered source specifier.
292
* @param index which specifier to return, from 0 to n-1, where
293
* n = countAvailableSources()
294
* @param result fill-in paramter to receive the source specifier.
295
* If index is out of range, result will be empty.
296
* @return reference to result
298
UnicodeString& getAvailableSource(int32_t index,
299
UnicodeString& result) const;
302
* Return the number of registered target specifiers for a given
304
* @param source the given source specifier.
305
* @return the number of registered target specifiers for a given
308
int32_t countAvailableTargets(const UnicodeString& source) const;
311
* Return a registered target specifier for a given source.
312
* @param index which specifier to return, from 0 to n-1, where
313
* n = countAvailableTargets(source)
314
* @param source the source specifier
315
* @param result fill-in paramter to receive the target specifier.
316
* If source is invalid or if index is out of range, result will
318
* @return reference to result
320
UnicodeString& getAvailableTarget(int32_t index,
321
const UnicodeString& source,
322
UnicodeString& result) const;
325
* Return the number of registered variant specifiers for a given
326
* source-target pair. There is always at least one variant: If
327
* just source-target is registered, then the single variant
328
* NO_VARIANT is returned. If source-target/variant is registered
329
* then that variant is returned.
330
* @param source the source specifiers
331
* @param target the target specifiers
332
* @return the number of registered variant specifiers for a given
333
* source-target pair.
335
int32_t countAvailableVariants(const UnicodeString& source,
336
const UnicodeString& target) const;
339
* Return a registered variant specifier for a given source-target
340
* pair. If NO_VARIANT is one of the variants, then it will be
342
* @param index which specifier to return, from 0 to n-1, where
343
* n = countAvailableVariants(source, target)
344
* @param source the source specifier
345
* @param target the target specifier
346
* @param result fill-in paramter to receive the variant
347
* specifier. If source is invalid or if target is invalid or if
348
* index is out of range, result will be empty.
349
* @return reference to result
351
UnicodeString& getAvailableVariant(int32_t index,
352
const UnicodeString& source,
353
const UnicodeString& target,
354
UnicodeString& result) const;
358
//----------------------------------------------------------------
359
// Private implementation
360
//----------------------------------------------------------------
362
TransliteratorEntry* find(const UnicodeString& ID);
364
TransliteratorEntry* find(UnicodeString& source,
365
UnicodeString& target,
366
UnicodeString& variant);
368
TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
369
const TransliteratorSpec& trg,
370
const UnicodeString& variant) const;
372
TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
373
const TransliteratorSpec& trg,
374
const UnicodeString& variant);
376
static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
377
const TransliteratorSpec& specToFind,
378
const UnicodeString& variant,
379
UTransDirection direction);
381
void registerEntry(const UnicodeString& source,
382
const UnicodeString& target,
383
const UnicodeString& variant,
384
TransliteratorEntry* adopted,
387
void registerEntry(const UnicodeString& ID,
388
TransliteratorEntry* adopted,
391
void registerEntry(const UnicodeString& ID,
392
const UnicodeString& source,
393
const UnicodeString& target,
394
const UnicodeString& variant,
395
TransliteratorEntry* adopted,
398
void registerSTV(const UnicodeString& source,
399
const UnicodeString& target,
400
const UnicodeString& variant);
402
void removeSTV(const UnicodeString& source,
403
const UnicodeString& target,
404
const UnicodeString& variant);
406
Transliterator* instantiateEntry(const UnicodeString& ID,
407
TransliteratorEntry *entry,
408
TransliteratorAlias*& aliasReturn,
412
* A StringEnumeration over the registered IDs in this object.
414
class Enumeration : public StringEnumeration {
416
Enumeration(const TransliteratorRegistry& reg);
417
virtual ~Enumeration();
418
virtual int32_t count(UErrorCode& status) const;
419
virtual const UnicodeString* snext(UErrorCode& status);
420
virtual void reset(UErrorCode& status);
421
static UClassID U_EXPORT2 getStaticClassID();
422
virtual UClassID getDynamicClassID() const;
425
const TransliteratorRegistry& reg;
427
friend class Enumeration;
432
* Dynamic registry mapping full IDs to Entry objects. This
433
* contains both public and internal entities. The visibility is
434
* controlled by whether an entry is listed in availableIDs and
440
* DAG of visible IDs by spec. Hashtable: source => (Hashtable:
441
* target => (UVector: variant)) The UVector of variants is never
442
* empty. For a source-target with no variant, the special
443
* variant NO_VARIANT (the empty string) is stored in slot zero of
449
* Vector of public full IDs.
451
UVector availableIDs;
453
TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
454
TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
459
#endif /* #if !UCONFIG_NO_TRANSLITERATION */