102
106
json.dump(function_dict, open(dest, 'wb'), indent=4)
108
self.run_kakasi(opts)
110
def run_kakasi(self, opts):
112
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
113
dest = self.j(self.RESOURCES, 'localization',
114
'pykakasi','kanwadict2.db')
115
base = os.path.dirname(dest)
116
if not os.path.exists(base):
119
if self.newer(dest, src):
120
self.info('\tGenerating Kanwadict')
122
for line in open(src, "r"):
123
self.parsekdict(line)
126
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
127
dest = self.j(self.RESOURCES, 'localization',
128
'pykakasi','itaijidict2.pickle')
130
if self.newer(dest, src):
131
self.info('\tGenerating Itaijidict')
132
self.mkitaiji(src, dest)
134
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
135
dest = self.j(self.RESOURCES, 'localization',
136
'pykakasi','kanadict2.pickle')
138
if self.newer(dest, src):
139
self.info('\tGenerating kanadict')
140
self.mkkanadict(src, dest)
145
def mkitaiji(self, src, dst):
147
for line in open(src, "r"):
148
line = line.decode("utf-8").strip()
149
if line.startswith(';;'): # skip comment
151
if re.match(r"^$",line):
153
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
154
dic[pair[0]] = pair[1]
155
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
157
def mkkanadict(self, src, dst):
159
for line in open(src, "r"):
160
line = line.decode("utf-8").strip()
161
if line.startswith(';;'): # skip comment
163
if re.match(r"^$",line):
165
(alpha, kana) = line.split(' ')
167
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
169
def parsekdict(self, line):
170
line = line.decode("utf-8").strip()
171
if line.startswith(';;'): # skip comment
173
(yomi, kanji) = line.split(' ')
174
if ord(yomi[-1:]) <= ord('z'):
179
self.updaterec(kanji, yomi, tail)
181
def updaterec(self, kanji, yomi, tail):
182
key = "%04x"%ord(kanji[0])
183
if key in self.records:
184
if kanji in self.records[key]:
185
rec = self.records[key][kanji]
186
rec.append((yomi,tail))
187
self.records[key].update( {kanji: rec} )
189
self.records[key][kanji]=[(yomi, tail)]
191
self.records[key] = {}
192
self.records[key][kanji]=[(yomi, tail)]
194
def kanwaout(self, out):
195
dic = anydbm.open(out, 'c')
196
for (k, v) in self.records.iteritems():
197
dic[k] = compress(marshal.dumps(v))
105
202
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
106
203
x = self.j(self.RESOURCES, x+'.pickle')
107
204
if os.path.exists(x):
206
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
207
if os.path.exists(kakasi):
208
shutil.rmtree(kakasi)