2
******************************************************************************
4
* Copyright (C) 2008-2009, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
******************************************************************************
8
* file name: uspoof_conf.h
10
* tab size: 8 (not used)
13
* created on: 2009Jan05
14
* created by: Andy Heninger
16
* Internal classes for compiling confusable data into its binary (runtime) form.
19
#ifndef __USPOOF_BUILDCONF_H__
20
#define __USPOOF_BUILDCONF_H__
22
#if !UCONFIG_NO_NORMALIZATION
24
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
26
#include "uspoof_impl.h"
31
// Holds a string that is the result of one of the mappings defined
32
// by the confusable mapping data (confusables.txt from Unicode.org)
33
// Instances of SPUString exist during the compilation process only.
35
struct SPUString : public UMemory {
36
UnicodeString *fStr; // The actual string.
37
int32_t fStrTableIndex; // Index into the final runtime data for this string.
38
// (or, for length 1, the single string char itself,
39
// there being no string table entry for it.)
40
SPUString(UnicodeString *s);
45
// String Pool A utility class for holding the strings that are the result of
46
// the spoof mappings. These strings will utimately end up in the
47
// run-time String Table.
48
// This is sort of like a sorted set of strings, except that ICU's anemic
49
// built-in collections don't support those, so it is implemented with a
50
// combination of a uhash and a UVector.
53
class SPUStringPool : public UMemory {
55
SPUStringPool(UErrorCode &status);
58
// Add a string. Return the string from the table.
59
// If the input parameter string is already in the table, delete the
60
// input parameter and return the existing string.
61
SPUString *addString(UnicodeString *src, UErrorCode &status);
64
// Get the n-th string in the collection.
65
SPUString *getByIndex(int32_t i);
67
// Sort the contents; affects the ordering of getByIndex().
68
void sort(UErrorCode &status);
73
UVector *fVec; // Elements are SPUString *
74
UHashtable *fHash; // Key: UnicodeString Value: SPUString
78
// class ConfusabledataBuilder
79
// An instance of this class exists while the confusable data is being built from source.
80
// It encapsulates the intermediate data structures that are used for building.
81
// It exports one static function, to do a confusable data build.
83
class ConfusabledataBuilder : public UMemory {
85
SpoofImpl *fSpoofImpl;
91
UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
93
// The binary data is first assembled into the following four collections, then
94
// copied to its final raw-memory destination.
97
UnicodeString *fStringTable;
98
UVector *fStringLengthsTable;
100
SPUStringPool *stringPool;
101
URegularExpression *fParseLine;
102
URegularExpression *fParseHexNum;
105
ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
106
~ConfusabledataBuilder();
107
void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
109
// Add an entry to the key and value tables being built
110
// input: data from SLTable, MATable, etc.
111
// outut: entry added to fKeyVec and fValueVec
112
void addKeyEntry(UChar32 keyChar, // The key character
113
UHashtable *table, // The table, one of SATable, MATable, etc.
114
int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
117
// From an index into fKeyVec & fValueVec
118
// get a UnicodeString with the corresponding mapping.
119
UnicodeString getMapping(int32_t key);
121
// Populate the final binary output data array with the compiled data.
122
void outputData(UErrorCode &status);
125
static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
126
int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
131
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
132
#endif // __USPOOF_BUILDCONF_H__