2
// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
5
***************************************************************************
6
* Copyright (C) 2002-2005 International Business Machines Corporation *
7
* and others. All rights reserved. *
8
***************************************************************************
11
#include "unicode/utypes.h"
13
#if !UCONFIG_NO_BREAK_ITERATION
15
#include "unicode/unistr.h"
16
#include "unicode/uniset.h"
17
#include "unicode/uchar.h"
18
#include "unicode/parsepos.h"
27
// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
28
// when the hash table is deleted.
31
static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
32
RBBISymbolTableEntry *px = (RBBISymbolTableEntry *)p;
41
RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
42
:fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
45
fCachedSetLookup = NULL;
47
fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, &status);
48
// uhash_open checks status
49
if (U_FAILURE(status)) {
52
uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
57
RBBISymbolTable::~RBBISymbolTable()
59
uhash_close(fHashTable);
64
// RBBISymbolTable::lookup This function from the abstract symbol table inteface
65
// looks up a variable name and returns a UnicodeString
66
// containing the substitution text.
68
// The variable name does NOT include the leading $.
70
const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
72
RBBISymbolTableEntry *el;
76
const UnicodeString *retString;
77
RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
79
el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
85
exprNode = varRefNode->fLeftChild; // Root node of expression for variable
86
if (exprNode->fType == RBBINode::setRef) {
87
// The $variable refers to a single UnicodeSet
88
// return the ffffString, which will subsequently be interpreted as a
89
// stand-in character for the set by RBBISymbolTable::lookupMatcher()
90
usetNode = exprNode->fLeftChild;
91
This->fCachedSetLookup = usetNode->fInputSet;
92
retString = &ffffString;
96
// The variable refers to something other than just a set.
97
// return the original source string for the expression
98
retString = &exprNode->fText;
99
This->fCachedSetLookup = NULL;
107
// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
108
// interface maps a single stand-in character to a
109
// pointer to a Unicode Set. The Unicode Set code uses this
110
// mechanism to get all references to the same $variable
111
// name to refer to a single common Unicode Set instance.
113
// This implementation cheats a little, and does not maintain a map of stand-in chars
114
// to sets. Instead, it takes advantage of the fact that the UnicodeSet
115
// constructor will always call this function right after calling lookup(),
116
// and we just need to remember what set to return between these two calls.
117
const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
119
UnicodeSet *retVal = NULL;
120
RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
122
retVal = fCachedSetLookup;
123
This->fCachedSetLookup = 0;
129
// RBBISymbolTable::parseReference This function from the abstract symbol table interface
130
// looks for a $variable name in the source text.
131
// It does not look it up, only scans for it.
132
// It is used by the UnicodeSet parser.
134
// This implementation is lifted pretty much verbatim
135
// from the rules based transliterator implementation.
136
// I didn't see an obvious way of sharing it.
138
UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
139
ParsePosition& pos, int32_t limit) const
141
int32_t start = pos.getIndex();
143
UnicodeString result;
145
UChar c = text.charAt(i);
146
if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
151
if (i == start) { // No valid name chars
152
return result; // Indicate failure with empty string
155
text.extractBetween(start, i, result);
162
// RBBISymbolTable::lookupNode Given a key (a variable name), return the
163
// corresponding RBBI Node. If there is no entry
164
// in the table for this name, return NULL.
166
RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
168
RBBINode *retNode = NULL;
169
RBBISymbolTableEntry *el;
171
el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
180
// RBBISymbolTable::addEntry Add a new entry to the symbol table.
181
// Indicate an error if the name already exists -
182
// this will only occur in the case of duplicate
183
// variable assignments.
185
void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
186
RBBISymbolTableEntry *e;
187
/* test for buffer overflows */
188
if (U_FAILURE(err)) {
191
e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
193
err = U_BRK_VARIABLE_REDFINITION;
197
e = new RBBISymbolTableEntry;
199
err = U_MEMORY_ALLOCATION_ERROR;
204
uhash_put( fHashTable, &e->key, e, &err);
208
RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
210
RBBISymbolTableEntry::~RBBISymbolTableEntry() {
211
// The "val" of a symbol table entry is a variable reference node.
212
// The l. child of the val is the rhs expression from the assignment.
213
// Unlike other node types, children of variable reference nodes are not
214
// automatically recursively deleted. We do it manually here.
215
delete val->fLeftChild;
216
val->fLeftChild = NULL;
220
// Note: the key UnicodeString is destructed by virtue of being in the object by value.
225
// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
228
void RBBISymbolTable::rbbiSymtablePrint() const {
229
RBBIDebugPrintf("Variable Definitions\n"
230
"Name Node Val String Val\n"
231
"----------------------------------------------------------------------\n");
234
const UHashElement *e = NULL;
236
e = uhash_nextElement(fHashTable, &pos);
240
RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
242
RBBI_DEBUG_printUnicodeString(s->key, 15);
243
RBBIDebugPrintf(" %8p ", (void *)s->val);
244
RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
245
RBBIDebugPrintf("\n");
248
RBBIDebugPrintf("\nParsed Variable Definitions\n");
251
e = uhash_nextElement(fHashTable, &pos);
255
RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
256
RBBI_DEBUG_printUnicodeString(s->key);
257
s->val->fLeftChild->printTree(TRUE);
258
RBBIDebugPrintf("\n");
269
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */