4
#===============================================================================
5
# Author: Walapa Muangjeen
6
#===============================================================================
9
#4.0.264 - version update
14
class aligner(object):
23
def open(self,parent,cfg):
26
self.regex['zh_hk'] = re.compile(u"“(.*?)”( |)\((.*?)\)")
27
self.regex['en'] = re.compile(u"\"(.*?)\"( |)\((.*?)\)")
28
self.a = ( u'( \(|\()', u'(\)\*|\)|\) )' )
30
self.b['zh_hk'] = ( u'”、“', u'”和“' )
31
self.b['en'] = ( u'', u'' )
33
self.z['zh_hk'] = ( u'”、“', '', u', ' )
34
self.z['en'] = ( u'" and "','' , u'、' )
40
ksrc[cf.rdrlang] = ktgt[cf.srclang]
43
newfilelist = list(self.p.cfoutput[ksrc]['filelist'])
44
newfilelist[cf.kind] = newfilelist[cf.kind] + '-dictionary'
45
self.newkey[(ktgt[cf.srclang],ktgt[cf.srclang])] = tuple(newfilelist[cf.stage+1:])
47
newfilelist = list(self.p.cfoutput[ktgt]['filelist'])
48
newfilelist[cf.kind] = newfilelist[cf.kind] + '-dictionary'
49
self.newkey[(ktgt[cf.srclang],ktgt[cf.rdrlang])] = tuple(newfilelist[cf.stage+1:])
51
newfilelist = list(self.p.cfoutput[ktgt]['filelist'])
52
newfilelist[cf.kind] = newfilelist[cf.kind] + '-dictionary'
53
newfilelist[cf.srclang] = ktgt[cf.rdrlang]
54
self.newkey[(ktgt[cf.rdrlang],ktgt[cf.rdrlang])] = tuple(newfilelist[cf.stage+1:])
56
newfilelist = list(self.p.cfoutput[ksrc]['filelist'])
57
newfilelist[cf.kind] = newfilelist[cf.kind] + '-dictionary'
58
newfilelist[cf.srclang] = ktgt[cf.rdrlang]
59
self.newkey[(ktgt[cf.rdrlang],ktgt[cf.srclang])] = tuple(newfilelist[cf.stage+1:])
61
for linenum in range(len(self.p.cfoutput[ktgt]['tempbuff'])):
62
self.p.cfoutput[ktgt]['tempbuff'][linenum] = self.__filter(self.p.cfoutput[ktgt]['tempbuff'][linenum], linenum, ktgt, ktgt[cf.rdrlang])
63
self.p.cfoutput[ksrc]['tempbuff'][linenum] = self.__filter(self.p.cfoutput[ksrc]['tempbuff'][linenum], linenum, ktgt, ktgt[cf.srclang])
65
self.p.writer.run(self.p.cfextract)
66
self.p.writer.flush(self.p.cfextract)
74
def __filter(self,text,linenum,ktgt,lang):
75
'''extract dictionary'''
76
results = re.findall(self.regex[lang], text)
78
for lefttxt in found[0].split(self.z[lang][0]):
79
for righttxt in found[2].split(self.z[lang][2]):
80
if lang == ktgt[cf.srclang]:
81
self.p.cfoutput[self.newkey[(ktgt[cf.srclang],ktgt[cf.srclang])]]['tempbuff'].append(lefttxt)
82
self.p.cfoutput[self.newkey[(ktgt[cf.srclang],ktgt[cf.rdrlang])]]['tempbuff'].append(righttxt)
84
self.p.cfoutput[self.newkey[(ktgt[cf.rdrlang],ktgt[cf.srclang])]]['tempbuff'].append(righttxt)
85
self.p.cfoutput[self.newkey[(ktgt[cf.rdrlang],ktgt[cf.rdrlang])]]['tempbuff'].append(lefttxt)
86
text = re.sub(self.a[0] + found[2] + self.a[1], '', text)
87
text = re.sub(self.b[lang][0], self.b[lang][1], text)
91
'''Command prompt help.'''
92
return "\n%s\n\tUsage:\n\tfrom %s import filtergraph\n"%(
93
os.path.basename(sys.argv[0]),
94
os.path.splitext(os.path.basename(sys.argv[0]))[0]
97
licensetxt=u'''CorpusFiltergraph™ v4.0
98
Copyright © 2010-2012 Precision Translation Tools Co., Ltd.
100
This program is free software: you can redistribute it and/or modify
101
it under the terms of the GNU Lesser General Public License as published by
102
the Free Software Foundation, either version 3 of the License, or
103
(at your option) any later version.
105
This program is distributed in the hope that it will be useful,
106
but WITHOUT ANY WARRANTY; without even the implied warranty of
107
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
108
GNU Lesser General Public License for more details.
110
You should have received a copy of the GNU Lesser General Public License
111
along with this program. If not, see http://www.gnu.org/licenses/.
113
For more information, please contact Precision Translation Tools Co., Ltd.
114
at: http://www.precisiontranslationtools.com'''
116
if __name__ == "__main__":
119
sys.stdout.write(usage().encode('utf8')+'\n')