2
*******************************************************************************
4
* Copyright (C) 2001, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
8
* file name: ucol_tok.h
10
* tab size: 8 (not used)
14
* created by: Vladimir Weinstein
16
* This module reads a tailoring rule string and produces a list of
17
* tokens that will be turned into collation elements
26
#include "unicode/parseerr.h"
28
#define UCOL_TOK_UNSET 0xFFFFFFFF
29
#define UCOL_TOK_RESET 0xDEADBEEF
31
#define UCOL_TOK_POLARITY_NEGATIVE 0
32
#define UCOL_TOK_POLARITY_POSITIVE 1
34
#define UCOL_TOK_TOP 0x04
35
#define UCOL_TOK_VARIABLE_TOP 0x08
36
#define UCOL_TOK_BEFORE 0x03
37
#define UCOL_TOK_SUCCESS 0x10
39
/* this is space for the extra strings that need to be unquoted */
40
/* during the parsing of the rules */
41
#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 2048
42
typedef struct UColToken UColToken;
54
uint32_t previousContCE;
55
int32_t pos[UCOL_STRENGTH_LIMIT];
56
uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
57
uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
58
uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
59
UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
60
UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
76
uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
77
UColTokListHeader *listHeader;
84
* This is a token that has been parsed
85
* but not yet processed. Used to reduce
86
* the number of arguments in the parser
92
uint32_t extensionOffset;
93
uint32_t extensionLen;
94
uint32_t prefixOffset;
97
uint16_t indirectIndex;
102
UColParsedToken parsedToken;
106
UChar *sourceCurrent;
109
const InverseTableHeader *invUCA;
110
const UCollator *UCA;
111
UHashtable *tailored;
114
UColTokListHeader *lh;
119
const UChar *subName;
121
UColAttributeValue attrVal;
125
const UChar *optionName;
127
const ucolTokSuboption *subopts;
132
#define ucol_tok_isSpecialChar(ch) \
133
(((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
134
(((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
135
(((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
136
(((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
141
uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
142
UParseError *parseError,
146
void ucol_tok_initTokenList(UColTokenParser *src, const UChar *rules, const uint32_t rulesLength, UCollator *UCA, UErrorCode *status);
148
U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
150
U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src,
152
UParseError *parseError,