16
16
friend class CPinyinTrieMaker;
18
18
struct TTransUnit {
23
23
struct TWordIdInfo {
24
24
#ifdef WORDS_BIGENDIAN
27
unsigned m_csLevel : 2;
28
unsigned m_id : WORD_ID_WIDTH;
27
unsigned m_csLevel : 2;
28
unsigned m_id : WORD_ID_WIDTH;
30
unsigned m_id : WORD_ID_WIDTH;
31
unsigned m_csLevel : 2;
30
unsigned m_id : WORD_ID_WIDTH;
31
unsigned m_csLevel : 2;
36
36
TWordIdInfo() { memset(this, 0, sizeof(TWordIdInfo)); }
38
TWordIdInfo(unsigned id, unsigned len=0, unsigned seen=0, unsigned cost = 0, unsigned cslvl = 0)
38
TWordIdInfo(unsigned id,
39
43
: m_id(id), m_csLevel(cslvl), m_cost(cost), m_bSeen(seen) { }
41
45
operator unsigned int() const { return m_id; }
45
49
#ifdef WORDS_BIGENDIAN
47
unsigned m_bFullSyllableTransfer: 1;
48
unsigned m_csLevel : 2;
49
unsigned m_nTransfer : 12;
50
unsigned m_nWordId : 12;
51
unsigned m_bFullSyllableTransfer : 1;
52
unsigned m_csLevel : 2;
53
unsigned m_nTransfer : 12;
54
unsigned m_nWordId : 12;
52
unsigned m_nWordId : 12;
53
unsigned m_nTransfer : 12;
54
unsigned m_csLevel : 2;
55
unsigned m_bFullSyllableTransfer: 1;
56
unsigned m_nWordId : 12;
57
unsigned m_nTransfer : 12;
58
unsigned m_csLevel : 2;
59
unsigned m_bFullSyllableTransfer : 1;
60
size_for(unsigned int nTransfer, unsigned int nWordId)
61
{ return sizeof(TNode) + sizeof(TTransUnit)*nTransfer +
62
sizeof(TWordIdInfo)*nWordId; }
65
{ *((unsigned*)this) = 0; }
68
hasPinyinChild(void) const
69
{ return (m_nTransfer > 1);}
73
{ return (TTransUnit*)(this+1); }
77
{ return (TWordIdInfo*)(((char*)(this+1))+sizeof(TTransUnit)*m_nTransfer); }
80
transfer(unsigned s) const
82
unsigned int b = 0, e = m_nTransfer;
83
const TTransUnit* ptrans = getTrans();
86
if (ptrans[m].m_Syllable == s)
87
return ptrans[m].m_Offset;
88
if (ptrans[m].m_Syllable < s)
63
static unsigned int size_for(unsigned int nTransfer,
64
unsigned int nWordId) {
65
return sizeof(TNode) + sizeof(TTransUnit) * nTransfer +
66
sizeof(TWordIdInfo) * nWordId;
70
{ *((unsigned *) this) = 0; }
72
bool hasPinyinChild(void) const
73
{ return(m_nTransfer > 1); }
75
const TTransUnit*getTrans() const
76
{ return (TTransUnit *) (this + 1); }
78
const TWordIdInfo*getWordIdPtr() const
79
{ return (TWordIdInfo *) (((char *) (this +
80
1)) + sizeof(TTransUnit) *
83
unsigned int transfer(unsigned s) const {
84
unsigned int b = 0, e = m_nTransfer;
85
const TTransUnit* ptrans = getTrans();
87
int m = b + (e - b) / 2;
88
if (ptrans[m].m_Syllable == s)
89
return ptrans[m].m_Offset;
90
if (ptrans[m].m_Syllable < s)
98
100
CPinyinTrie() : m_Size(0), m_mem(NULL), m_words(NULL) { }
104
106
load(const char* fileName);
110
isValid(const TNode* pnode, bool allowNonComplete, unsigned csLevel=0);
113
getRootOffset() const
114
{ return 3 * sizeof(unsigned int); }
118
{ return (TNode*)(m_mem+getRootOffset()); }
121
nodeFromOffset(unsigned int offset) const
122
{ return (offset < getRootOffset())?NULL:((TNode*)(m_mem+offset)); }
125
getWordCount(void) const
126
{ return *(unsigned int*)m_mem; }
129
getNodeCount(void) const
130
{ return *(unsigned int*)(m_mem+sizeof(unsigned int)); }
133
getStringOffset(void) const
134
{ return *(unsigned int*)(m_mem+2*sizeof(unsigned int)); }
137
transfer(const TNode* pnode, unsigned s) const
138
{ return nodeFromOffset(pnode->transfer(s)); }
141
transfer(unsigned s) const
142
{ return transfer(getRootNode(), s); }
112
isValid(const TNode* pnode, bool allowNonComplete, unsigned csLevel = 0);
114
unsigned int getRootOffset() const
115
{ return 3 * sizeof(unsigned int); }
117
const TNode*getRootNode() const
118
{ return (TNode *) (m_mem + getRootOffset()); }
120
const TNode*nodeFromOffset(unsigned int offset) const
121
{ return (offset < getRootOffset()) ? NULL : ((TNode *) (m_mem + offset)); }
123
unsigned int getWordCount(void) const
124
{ return *(unsigned int *) m_mem; }
126
unsigned int getNodeCount(void) const
127
{ return *(unsigned int *) (m_mem + sizeof(unsigned int)); }
129
unsigned int getStringOffset(void) const
130
{ return *(unsigned int *) (m_mem + 2 * sizeof(unsigned int)); }
132
inline const TNode*transfer(const TNode* pnode, unsigned s) const
133
{ return nodeFromOffset(pnode->transfer(s)); }
135
inline const TNode*transfer(unsigned s) const
136
{ return transfer(getRootNode(), s); }
145
139
getSymbolId(const TWCHAR* wstr);