4
import sqlite3 as sqlite
5
from pinyin_data import valid_syllables, decode_syllable, initials, finals
7
def get_userdict_path ():
8
homedir = os.environ.get("HOME")
10
if sys.platform == "darwin":
11
return homedir+"/Library/Application Support/SunPinyin/userdict"
13
# FIXME: not sure how to get the ibus version or wrapper type (xim or ibus)
14
if os.path.exists (homedir+"/.cache/ibus/sunpinyin"):
15
return homedir+"/.cache/ibus/sunpinyin/userdict"
17
if os.path.exists (homedir+"/.ibus/sunpinyin"):
18
return homedir+"/.ibus/sunpinyin/userdict"
20
if os.path.exists (homedir+"/.sunpinyin"):
21
return homedir+"/.sunpinyin/userdict"
23
raise "Can not detect sunpinyin's userdict!"
25
def get_sysdict_path ():
26
if sys.platform == "darwin":
27
homedir = os.environ.get("HOME")
28
sysdict_path = "/Library/Input Methods/SunPinyin.app/Contents/Resources/pydict_sc.bin"
29
if os.path.exists (homedir + sysdict_path):
30
return homedir + sysdict_path
34
return "/usr/lib/sunpinyin/data/pydict_sc.bin"
36
def load_system_dict ():
37
sysdict_path = get_sysdict_path ()
38
f = open (sysdict_path, "rb")
41
word_offset = struct.unpack ('I', f.read(4))[0]
47
for w in str.decode('UTF-32').split('\0'):
54
def import_to_sunpinyin_user_dict (records, userdict_path=''):
55
userdict_path = userdict_path if userdict_path else get_userdict_path()
56
db = sqlite.connect (userdict_path)
58
sysdict = load_system_dict()
61
CREATE TABLE IF NOT EXISTS dict(
62
id INTEGER PRIMARY KEY, len INTEGER,
63
i0 INTEGER, i1 INTEGER, i2 INTEGER, i3 INTEGER, i4 INTEGER, i5 INTEGER,
64
f0 INTEGER, f1 INTEGER, f2 INTEGER, f3 INTEGER, f4 INTEGER, f5 INTEGER,
65
utf8str TEXT, UNIQUE (utf8str));
67
db.executescript (sqlstring)
71
for (pystr, utf8str) in records:
73
syllables = [valid_syllables[s] for s in pystr.split("'")]
75
print "[%s] has un-recognized syllables, ignoring this record!" % pystr
78
if len (syllables) < 2 or len (syllables) > 6:
79
print "[%s] is too long or too short for sunpinyin userdict" % utf8str
82
if utf8str in sysdict:
83
#print "[%s] is already in sunpinyin's sysdict" % utf8str
87
record[0] = len (syllables)
92
i, f = s>>12, (s&0x00ff0)>>4
100
INSERT INTO dict (len, i0, f0, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, utf8str)
101
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
104
db.execute (sqlstring, record)
105
#print "[%s] is imported into sunpinyin's userdict" % utf8str
108
if batch_count == 100:
113
#print "[%s] is already in sunpinyin's userdict" % utf8str
119
def export_sunpinyin_user_dict (userdict_path=''):
120
userdict_path = userdict_path if userdict_path else get_userdict_path()
121
db = sqlite.connect (userdict_path)
123
sqlstring = "SELECT * FROM dict"
124
result = list (db.execute (sqlstring).fetchall ())
126
for record in result:
132
syls = [initials[i[x]] + finals[f[x]] for x in range(l)]
133
print str.encode ('UTF-8'), id, "'".join(syls)
135
if __name__ == "__main__":
136
export_sunpinyin_user_dict ()