2
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
4
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6
6
* The contents of this file are subject to the terms of either the GNU Lesser
7
7
* General Public License Version 2.1 only ("LGPL") or the Common Development and
8
8
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
9
9
* file except in compliance with the License. You can obtain a copy of the CDDL at
10
10
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12
12
* specific language governing permissions and limitations under the License. When
13
13
* distributing the software, include this License Header Notice in each file and
14
14
* include the full text of the License in the License file as well as the
15
15
* following notice:
17
17
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19
19
* For Covered Software in this distribution, this License shall be governed by the
53
53
typedef std::pair<std::string, std::string> CCorrectionPair;
54
54
typedef std::vector<CCorrectionPair> CCorrectionPairVec;
56
CGetCorrectionPairOp () : m_bEnabled(false) {m_correctionPairs.reserve(8);}
58
void setEnable (bool value=true) {m_bEnabled = value;}
59
bool isEnabled () {return m_bEnabled;}
61
void setCorrectionPairs (const string_pairs& pairs)
62
{m_correctionPairs = pairs;}
64
const char * operator () (std::string& pystr, unsigned& matched_len);
56
CGetCorrectionPairOp () : m_bEnabled(false) { m_correctionPairs.reserve(8); }
58
void setEnable(bool value = true) { m_bEnabled = value; }
59
bool isEnabled() { return m_bEnabled; }
61
void setCorrectionPairs(const string_pairs& pairs)
62
{ m_correctionPairs = pairs; }
64
const char * operator ()(std::string& pystr, unsigned& matched_len);
67
CCorrectionPairVec m_correctionPairs;
67
CCorrectionPairVec m_correctionPairs;
71
71
class CGetFuzzySegmentsOp : private CNonCopyable
74
typedef std::map<unsigned, std::pair<unsigned, unsigned> > CInnerFuzzyFinalMap;
74
typedef std::map<unsigned,
75
std::pair<unsigned, unsigned> > CInnerFuzzyFinalMap;
75
76
typedef std::map<unsigned, std::pair<char, unsigned> > CFuzzySyllableMap;
77
CGetFuzzySegmentsOp () : m_bEnabled(false), m_bInnerFuzzyEnabled(false) {_initMaps();}
78
unsigned operator () (IPySegmentor::TSegmentVec&, IPySegmentor::TSegmentVec&, wstring&);
78
CGetFuzzySegmentsOp () : m_bEnabled(false),
79
m_bInnerFuzzyEnabled(false) { _initMaps(); }
80
unsigned operator ()(IPySegmentor::TSegmentVec&,
81
IPySegmentor::TSegmentVec&,
80
void setEnable (bool value=true) {m_bEnabled = value;}
81
void setInnerFuzzyEnable (bool value=true) {m_bInnerFuzzyEnabled = value;}
82
bool isEnabled () {return m_bEnabled;}
84
void setEnable(bool value = true) { m_bEnabled = value; }
85
void setInnerFuzzyEnable(bool value = true) { m_bInnerFuzzyEnabled = value; }
86
bool isEnabled() { return m_bEnabled; }
86
unsigned _invalidateSegments (IPySegmentor::TSegmentVec&, IPySegmentor::TSegment&);
90
unsigned _invalidateSegments(IPySegmentor::TSegmentVec&,
91
IPySegmentor::TSegment&);
89
bool m_bInnerFuzzyEnabled;
90
CInnerFuzzyFinalMap m_fuzzyFinalMap;
91
CFuzzySyllableMap m_fuzzyPreMap;
92
CFuzzySyllableMap m_fuzzyProMap;
94
bool m_bInnerFuzzyEnabled;
95
CInnerFuzzyFinalMap m_fuzzyFinalMap;
96
CFuzzySyllableMap m_fuzzyPreMap;
97
CFuzzySyllableMap m_fuzzyProMap;
95
100
class CQuanpinSegmentor : public IPySegmentor
98
103
CQuanpinSegmentor ();
100
virtual TSegmentVec& getSegments (bool req_aux_segs)
102
if (req_aux_segs && m_pGetFuzzySegmentsOp && m_pGetFuzzySegmentsOp->isEnabled()) {
105
virtual TSegmentVec& getSegments(bool req_aux_segs){
106
if (req_aux_segs && m_pGetFuzzySegmentsOp &&
107
m_pGetFuzzySegmentsOp->isEnabled()) {
103
108
m_merged_segs.clear();
104
std::merge (m_segs.begin(), m_segs.end(),
105
m_fuzzy_segs.begin(), m_fuzzy_segs.end(),
106
back_inserter(m_merged_segs));
109
std::merge(m_segs.begin(), m_segs.end(),
110
m_fuzzy_segs.begin(), m_fuzzy_segs.end(),
111
back_inserter(m_merged_segs));
107
112
return m_merged_segs;
113
virtual const wstring& getInputBuffer () {return m_inputBuf;}
115
virtual const char* getSylSeps () {return "'";}
117
virtual unsigned push (unsigned ch);
118
virtual unsigned pop ();
119
virtual unsigned insertAt (unsigned idx, unsigned ch);
120
virtual unsigned deleteAt (unsigned idx, bool backward=true);
121
virtual unsigned clear (unsigned from=0);
123
virtual unsigned updatedFrom () {return m_updatedFrom;}
118
virtual const wstring& getInputBuffer() { return m_inputBuf; }
120
virtual const char* getSylSeps() { return "'"; }
122
virtual unsigned push(unsigned ch);
123
virtual unsigned pop();
124
virtual unsigned insertAt(unsigned idx, unsigned ch);
125
virtual unsigned deleteAt(unsigned idx, bool backward = true);
126
virtual unsigned clear(unsigned from = 0);
128
virtual unsigned updatedFrom() { return m_updatedFrom; }
125
130
bool load(const char * pyTrieFileName);
127
void setGetFuzzySyllablesOp (CGetFuzzySyllablesOp<CPinyinData> *op) {m_pGetFuzzySyllablesOp = op;}
128
void setGetCorrectionPairOp (CGetCorrectionPairOp *op) {m_pGetCorrectionPairOp = op;}
129
void setGetFuzzySegmentsOp (CGetFuzzySegmentsOp *op) {m_pGetFuzzySegmentsOp = op;}
132
void setGetFuzzySyllablesOp(CGetFuzzySyllablesOp<CPinyinData> *op) {
133
m_pGetFuzzySyllablesOp = op; }
134
void setGetCorrectionPairOp(CGetCorrectionPairOp *op) {
135
m_pGetCorrectionPairOp = op; }
136
void setGetFuzzySegmentsOp(CGetFuzzySegmentsOp *op) {
137
m_pGetFuzzySegmentsOp = op; }
132
inline unsigned _push (unsigned ch);
133
inline unsigned _clear (unsigned from);
134
inline void _addFuzzySyllables (TSegment &seg);
135
inline unsigned _updateWith (const std::string& new_pystr, unsigned from = UINT_MAX);
136
inline void _locateSegment (unsigned idx, unsigned &strIdx, unsigned &segIdx);
140
inline unsigned _push(unsigned ch);
141
inline unsigned _clear(unsigned from);
142
inline void _addFuzzySyllables(TSegment &seg);
143
inline unsigned _updateWith(const std::string& new_pystr,
144
unsigned from = UINT_MAX);
145
inline void _locateSegment(unsigned idx, unsigned &strIdx, unsigned &segIdx);
138
147
CGetFuzzySyllablesOp<CPinyinData> *m_pGetFuzzySyllablesOp;
139
148
CGetCorrectionPairOp *m_pGetCorrectionPairOp;
140
149
CGetFuzzySegmentsOp *m_pGetFuzzySegmentsOp;
142
151
CDATrie<short> m_pytrie;
146
TSegmentVec m_fuzzy_segs;
147
TSegmentVec m_merged_segs;
155
TSegmentVec m_fuzzy_segs;
156
TSegmentVec m_merged_segs;
149
unsigned m_updatedFrom;
158
unsigned m_updatedFrom;
152
161
#endif /* SUNPY_PINYIN_SEG_H */