1
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3
# Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
5
# The contents of this file are subject to the terms of either the GNU Lesser
6
# General Public License Version 2.1 only ("LGPL") or the Common Development and
7
# Distribution License ("CDDL")(collectively, the "License"). You may not use this
8
# file except in compliance with the License. You can obtain a copy of the CDDL at
9
# http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
10
# http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
# specific language governing permissions and limitations under the License. When
12
# distributing the software, include this License Header Notice in each file and
13
# include the full text of the License in the License file as well as the
16
# NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
18
# For Covered Software in this distribution, this License shall be governed by the
19
# laws of the State of California (excluding conflict-of-law provisions).
20
# Any litigation relating to this License shall be subject to the jurisdiction of
21
# the Federal Courts of the Northern District of California and the state courts
22
# of the State of California, with venue lying in Santa Clara County, California.
26
# If you wish your version of this file to be governed by only the CDDL or only
27
# the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
28
# include this software in this distribution under the [CDDL or LGPL Version 2.1]
29
# license." If you don't indicate a single choice of license, a recipient has the
30
# option to distribute your version of this file under either the CDDL or the LGPL
31
# Version 2.1, or to extend the choice of license to its licensees as provided
32
# above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
33
# Version 2 license, then the option applies only if the new code is made subject
34
# to such option by the copyright holder.
36
cdef extern from "Python.h":
37
ctypedef struct PyUnicodeObject:
39
ctypedef unsigned wchar_t
40
ctypedef wchar_t * const_wchar_t_ptr "const wchar_t *"
41
object PyUnicode_FromWideChar (wchar_t *, Py_ssize_t)
42
Py_ssize_t PyUnicode_AsWideChar (PyUnicodeObject *, wchar_t *, Py_ssize_t)
44
cdef extern from "portability.h":
45
ctypedef unsigned TWCHAR
46
ctypedef TWCHAR * const_TWCHAR_ptr "const TWCHAR *"
47
unsigned WCSLEN (const_TWCHAR_ptr ws)
49
cdef extern from "pytrie.h":
50
ctypedef struct CPinyinTrie_TWord "CPinyinTrie::TWordIdInfo":
57
ctypedef struct CPinyinTrie_TNode "CPinyinTrie::TNode":
59
CPinyinTrie_TWord * getWordIdPtr ()
61
ctypedef struct CPinyinTrie "CPinyinTrie":
62
bint load(char *filename)
65
CPinyinTrie_TNode * getRootNode ()
66
CPinyinTrie_TNode * transfer (CPinyinTrie_TNode *, unsigned)
67
const_TWCHAR_ptr getitem "operator []" (unsigned)
68
int getSymbolId (const_TWCHAR_ptr)
69
bint isValid (CPinyinTrie_TNode*, bint, unsigned)
71
CPinyinTrie *new_CPinyinTrie "new CPinyinTrie" ()
72
void del_CPinyinTrie "delete" (CPinyinTrie *pytrie)
76
cdef public int wid, cost, length, charset_level
78
def __cinit__ (self, wid, seen=True, cost=0, length=0, charset_level=0):
83
self.charset_level = charset_level
86
return "wid=%d, seen=%d, cost=%d, length=%d, charset_level=%d" % \
87
(self.wid, self.seen, self.cost, self.length, self.charset_level)
89
cdef class PinyinTrieNode:
90
cdef CPinyinTrie_TNode *pnode
94
cdef CPinyinTrie_TWord *p= <CPinyinTrie_TWord*> self.pnode.getWordIdPtr ()
95
for i in xrange (self.pnode.m_nWordId):
96
words.append (WordInfo(p[i].m_id, p[i].m_bSeen, p[i].m_cost, p[i].m_len, p[i].m_csLevel))
99
cdef class PinyinTrie:
100
cdef CPinyinTrie *thisptr
102
def __cinit__ (self):
103
self.thisptr = new_CPinyinTrie ()
105
def __dealloc__ (self):
106
del_CPinyinTrie (self.thisptr)
108
def load (self, fname):
109
return self.thisptr.load (fname)
114
def get_word_count (self):
115
return self.thisptr.getWordCount()
117
def get_root_node (self):
118
cdef CPinyinTrie_TNode * pnode = <CPinyinTrie_TNode*> self.thisptr.getRootNode ()
119
node = PinyinTrieNode ()
120
(<PinyinTrieNode>node).pnode = pnode
123
def transfer (self, node, syllable):
124
cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
125
pnode = <CPinyinTrie_TNode*> self.thisptr.transfer (pnode, <int>syllable)
129
node = PinyinTrieNode ()
130
(<PinyinTrieNode>node).pnode = pnode
133
def __getitem__ (self, idx):
134
if idx<0 or idx>=self.thisptr.getWordCount(): return ''
135
cdef const_TWCHAR_ptr cwstr = self.thisptr.getitem (idx)
136
return PyUnicode_FromWideChar (<const_wchar_t_ptr>cwstr, WCSLEN(cwstr))
138
def get_symbol_id (self, symbol):
140
if len (symbol) != 1: return 0
141
PyUnicode_AsWideChar (<PyUnicodeObject*> symbol, buf, sizeof(buf))
142
return self.thisptr.getSymbolId (<const_TWCHAR_ptr> buf)
144
def is_valid (self, node, allowNonComplete=True, csLevel=0):
145
cdef CPinyinTrie_TNode *pnode = (<PinyinTrieNode>node).pnode
146
return self.thisptr.isValid (pnode, allowNonComplete, csLevel)