4
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
5
// All Rights Reserved.
7
// This file contains declarations for class RBBIRuleScanner
14
#include "unicode/utypes.h"
15
#include "unicode/uobject.h"
16
#include "unicode/rbbi.h"
17
#include "unicode/uniset.h"
18
#include "unicode/parseerr.h"
21
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
22
// looks up references to $variables within a set.
24
//#include "rbbitblb.h"
30
class RBBIRuleBuilder;
31
class RBBISymbolTable;
34
//--------------------------------------------------------------------------------
36
// class RBBIRuleScanner does the lowest level, character-at-a-time
37
// scanning of break iterator rules.
39
// The output of the scanner is parse trees for
40
// the rule expressions and a list of all Unicode Sets
43
//--------------------------------------------------------------------------------
45
class RBBIRuleScanner : public UMemory {
49
kStackSize = 100 // The size of the state stack for
50
}; // rules parsing. Corresponds roughly
51
// to the depth of parentheses nesting
52
// that is allowed in the rules.
59
RBBIRuleScanner(RBBIRuleBuilder *rb);
62
virtual ~RBBIRuleScanner();
64
void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
65
// Return false if at end.
67
UBool push(const RBBIRuleChar &c); // Push (unget) one character.
68
// Only a single character may be pushed.
70
void parse(); // Parse the rules, generating two parse
71
// trees, one each for the forward and
73
// and a list of UnicodeSets encountered.
76
* Return a rules string without unnecessary
79
static UnicodeString stripRules(const UnicodeString &rules);
82
UBool doParseActions(int32_t a);
83
void error(UErrorCode e); // error reporting convenience function.
84
void fixOpStack(RBBINode::OpPrecedence p);
86
void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
90
void printNodeStack(const char *title);
92
RBBINode *pushNewNode(RBBINode::NodeType t);
96
RBBIRuleBuilder *fRB; // The rule builder that we are part of.
98
int32_t fScanIndex; // Index of current character being processed
99
// in the rule input string.
100
int32_t fNextIndex; // Index of the next character, which
101
// is the first character not yet scanned.
102
UBool fQuoteMode; // Scan is in a 'quoted region'
103
int32_t fLineNum; // Line number in input file.
104
int32_t fCharNum; // Char position within the line.
105
UChar32 fLastChar; // Previous char, needed to count CR-LF
106
// as a single line, not two.
108
RBBIRuleChar fC; // Current char for parse state machine
110
UnicodeString fVarName; // $variableName, valid when we've just
113
RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
114
// parsing. index by p[state][char-class]
116
uint16_t fStack[kStackSize]; // State stack, holds state pushes
117
int32_t fStackPtr; // and pops as specified in the state
120
RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
121
// during the parse of a rule
122
int32_t fNodeStackPtr;
125
UBool fReverseRule; // True if the rule currently being scanned
126
// is a reverse direction rule (if it
127
// starts with a '!')
129
UBool fLookAheadRule; // True if the rule includes a '/'
130
// somewhere within it.
132
RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
133
// $variable symbols.
135
UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
136
// the sets created while parsing rules.
137
// The key is the string used for creating
140
UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during
141
// the scanning of RBBI rules. The
142
// indicies for these are assigned by the
143
// perl script that builds the state tables.
146
int32_t fRuleNum; // Counts each rule as it is scanned.
148
int32_t fOptionStart; // Input index of start of a !!option
149
// keyword, while being scanned.
151
UnicodeSet *gRuleSet_rule_char;
152
UnicodeSet *gRuleSet_white_space;
153
UnicodeSet *gRuleSet_name_char;
154
UnicodeSet *gRuleSet_name_start_char;
156
RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
157
RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class