2
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
4
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6
6
* The contents of this file are subject to the terms of either the GNU Lesser
7
7
* General Public License Version 2.1 only ("LGPL") or the Common Development and
8
8
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
9
9
* file except in compliance with the License. You can obtain a copy of the CDDL at
10
10
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12
12
* specific language governing permissions and limitations under the License. When
13
13
* distributing the software, include this License Header Notice in each file and
14
14
* include the full text of the License in the License file as well as the
15
15
* following notice:
17
17
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19
19
* For Covered Software in this distribution, this License shall be governed by the
61
61
unsigned initial : 8;
62
62
unsigned other : 12;
64
TSyllable (unsigned int s=0)
65
{ *((unsigned*)this) = s; }
67
TSyllable (int i, int f, int t)
68
: initial(i), final(f), tone(t), other(0) { }
70
operator unsigned int() const
71
{ return *((unsigned *)this); }
73
bool isFullSyllable () const
76
bool operator == (const TSyllable & syl) const {
77
return (unsigned int )*this == (unsigned int)(syl);
64
TSyllable (unsigned int s = 0)
65
{ *((unsigned *) this) = s; }
67
TSyllable (int i, int f, int t)
68
: tone(t), final(f), initial(i), other(0) { }
70
operator unsigned int() const
71
{ return *((unsigned *) this); }
73
bool isFullSyllable() const
74
{ return final != 0; }
76
bool operator ==(const TSyllable & syl) const {
77
return (unsigned int ) *this == (unsigned int) (syl);
80
bool operator != (const TSyllable & syl) const {
80
bool operator !=(const TSyllable & syl) const {
81
81
return !(*this == syl);
84
bool operator == (const unsigned s) const {
85
return (unsigned int)*this == s;
84
bool operator ==(const unsigned s) const {
85
return (unsigned int) *this == s;
89
89
typedef struct _TPyTabEntry {
94
94
typedef std::vector<TSyllable> CSyllables;
96
96
template <class PinyinDataPolicy>
97
97
class CGetFuzzySyllablesOp : private CNonCopyable
100
100
typedef std::multimap<const std::string, std::string> CFuzzyMap;
102
102
CGetFuzzySyllablesOp () : m_bEnableFuzzies(false) {}
104
void setEnableFuzzies (bool value=true) {m_bEnableFuzzies = value;}
105
void setEnableSimplerInitials (bool value=true) {m_bEnableSimplerInitials = value;}
106
bool isEnabled () {return m_bEnableFuzzies || m_bEnableSimplerInitials;}
108
void clearFuzzyMap ()
109
{m_fuzzyMap.clear();}
111
void initFuzzyMap (const string_pairs& fuzzyPairs, bool duplex = true)
113
string_pairs::const_iterator it = fuzzyPairs.begin();
114
string_pairs::const_iterator ite = fuzzyPairs.end();
116
for (; it != ite; ++it) {
117
const std::string i = it->first;
118
const std::string j = it->second;
120
if (m_fuzzyMap.find(i) == m_fuzzyMap.end())
121
m_fuzzyMap.insert (std::pair<const std::string, std::string> (i, j));
123
if (duplex && m_fuzzyMap.find(j) == m_fuzzyMap.end())
124
m_fuzzyMap.insert (std::pair<const std::string, std::string> (j, i));
104
void setEnableFuzzies(bool value = true) { m_bEnableFuzzies = value; }
105
void setEnableSimplerInitials(bool value =
106
true) { m_bEnableSimplerInitials = value; }
107
bool isEnabled() { return m_bEnableFuzzies || m_bEnableSimplerInitials; }
110
{ m_fuzzyMap.clear(); }
112
void initFuzzyMap(const string_pairs& fuzzyPairs, bool duplex = true){
113
string_pairs::const_iterator it = fuzzyPairs.begin();
114
string_pairs::const_iterator ite = fuzzyPairs.end();
116
for (; it != ite; ++it) {
117
const std::string i = it->first;
118
const std::string j = it->second;
120
if (m_fuzzyMap.find(i) == m_fuzzyMap.end())
121
m_fuzzyMap.insert(std::pair<const std::string, std::string> (i,
124
if (duplex && m_fuzzyMap.find(j) == m_fuzzyMap.end())
125
m_fuzzyMap.insert(std::pair<const std::string, std::string> (j,
128
CSyllables operator () (TSyllable s)
131
static char buf[128];
134
PinyinDataPolicy::decodeSyllable (s, &i, &f);
136
if (m_bEnableSimplerInitials && !m_bEnableFuzzies && *f != '\0')
139
std::vector<const char *> iset;
140
std::vector<const char *> fset;
145
CFuzzyMap::const_iterator it;
146
for (it = m_fuzzyMap.lower_bound(i); it != m_fuzzyMap.upper_bound(i); ++it)
147
iset.push_back ((it->second).c_str());
149
for (it = m_fuzzyMap.lower_bound(f); it != m_fuzzyMap.upper_bound(f); ++it)
150
fset.push_back ((it->second).c_str());
152
std::vector<const char *>::const_iterator iset_it = iset.begin();
153
for (; iset_it != iset.end(); ++iset_it) {
154
std::vector<const char *>::const_iterator fset_it = fset.begin();
155
for (; fset_it != fset.end(); ++ fset_it) {
156
snprintf (buf, sizeof(buf), "%s%s", *iset_it, *fset_it);
157
TSyllable ts = PinyinDataPolicy::encodeSyllable (buf);
130
CSyllables operator ()(TSyllable s){
132
static char buf[128];
135
PinyinDataPolicy::decodeSyllable(s, &i, &f);
137
if (m_bEnableSimplerInitials && !m_bEnableFuzzies && *f != '\0')
140
std::vector<const char *> iset;
141
std::vector<const char *> fset;
146
CFuzzyMap::const_iterator it;
147
for (it = m_fuzzyMap.lower_bound(i);
148
it != m_fuzzyMap.upper_bound(i);
150
iset.push_back((it->second).c_str());
152
for (it = m_fuzzyMap.lower_bound(f);
153
it != m_fuzzyMap.upper_bound(f);
155
fset.push_back((it->second).c_str());
157
std::vector<const char *>::const_iterator iset_it = iset.begin();
158
for (; iset_it != iset.end(); ++iset_it) {
159
std::vector<const char *>::const_iterator fset_it = fset.begin();
160
for (; fset_it != fset.end(); ++fset_it) {
161
snprintf(buf, sizeof(buf), "%s%s", *iset_it, *fset_it);
162
TSyllable ts = PinyinDataPolicy::encodeSyllable(buf);
167
CFuzzyMap m_fuzzyMap;
168
bool m_bEnableFuzzies;
169
bool m_bEnableSimplerInitials;
172
CFuzzyMap m_fuzzyMap;
173
bool m_bEnableFuzzies;
174
bool m_bEnableSimplerInitials;