4
#===============================================================================
5
# Author: Walapa Muangjeen
6
#===============================================================================
9
#4.0.264 - version update
17
logger = logging.getLogger('.'.join([os.path.splitext(os.path.basename(sys.argv[0]))[0],'manager','filtergraph',__name__]))
39
def open(self,parent,cfg):
40
if not cfg['form'] in ['NFC','NFD','NFKC','NFKD',]:
41
self.errors.append([__name__,'invalid',"%s is invalid. \"form=\" must be '\NFC\', \'NFD\', \'NFKC\', or \'NFKD\'"%(cfg['form'])])
42
logger.warn('%s\t%s',*self.errors[-1][1:])
43
self.exceptions = cfg['exceptions']
44
self.form = cfg['form']
46
self.encoding = 'utf8' if 'utf8' in cfg['encoding'].lower().replace('-','') else cfg['encoding']
47
self.inputfile = cfg['inputfile'].replace('%(rootfolder)s',self.p.rootfolder) if cfg['inputfile'] else self.inputfile
48
self.outputfile = cfg['outputfile'].replace('%(rootfolder)s',self.p.rootfolder) if cfg['outputfile'] else self.outputfile
49
if (self.inputfile and not self.outputfile) or (not self.inputfile and self.outputfile):
50
self.errors.append([__name__,'invalid','[%s] inputfile=%s without outputfile= value'%(__name__,cfg['inputfile'])])
51
logger.warn('%s\t%s',*self.errors[-1][1:])
55
skipclose = not self.inputfile
56
if self.inputfile: return
59
self.p.cfoutput[k]['tempbuff'] = [u''.join([ch if ch in self.exception else unicodedata.normalize(self.form,ch) for ch in '\b'.join(line).split('\b')]) for line in self.p.cfoutput[k]['tempbuff']]
61
self.p.cfoutput[k]['tempbuff'] = [unicodedata.normalize(self.form,unicode(line)) for line in self.p.cfoutput[k]['tempbuff']]
71
if not os.path.exists(self.inputfile):
72
self.errors.append([__name__,'missing','[%s] %s'%(__name__,self.inputfile)])
73
logger.error('%s\t%s',*self.errors[-1][1:])
78
os.makedirs(os.path.dirname(self.outputfile))
81
logger.exception('%s\t%s, %s, %s',*['failed',e.errno,e.strerror,e.filename,])
84
# open input and output files
86
if out == self.inputfile:
88
fd,out = tempfile.mkstemp(suffix='.tmp', prefix='~', dir=self.p.tempdir)
91
o = codecs.open(out,'w',self.encoding)
92
i = codecs.open(self.inputfile,'r',self.encoding)
94
raise RuntimeError('Failed to open [%s] input/output files'%(__name__))
96
sys.stderr.write('[%s] %s\n Please wait'%(__name__,self.outputfile))
99
# loop writes output line-by-line
102
o.write('%s\n'%(u''.join([ch if ch in self.exception else unicodedata.normalize(self.form,ch) for ch in '\b'.join(line).split('\b')])))
104
if not cnt%5000: sys.stderr.write('.')
107
o.write('%s\n'%(unicodedata.normalize(self.form,unicode(line))))
109
if not cnt%5000: sys.stderr.write('.')
110
sys.stderr.write('\n')
111
# close input and output files
115
if not out == self.outputfile:
117
shutil.move(out,self.outputfile)
119
except KeyboardInterrupt:
121
raise KeyboardInterrupt()
123
if not os.path.exists(self.outputfile):
124
self.errors.append([__name__,'missing','[%s] %s'%(__name__,self.outputfile)])
125
logger.error('%s\t%s',*self.errors[-1][1:])
128
'''Command prompt help.'''
129
return "\n%s\n\tUsage:\n\tfrom %s import filter\n"%(
130
os.path.basename(sys.argv[0]),
131
os.path.splitext(os.path.basename(sys.argv[0]))[0]
134
licensetxt=u'''CorpusFiltergraph™ v4.0
135
Copyright © 2010-2012 Precision Translation Tools Co., Ltd.
137
This program is free software: you can redistribute it and/or modify
138
it under the terms of the GNU Lesser General Public License as published by
139
the Free Software Foundation, either version 3 of the License, or
140
(at your option) any later version.
142
This program is distributed in the hope that it will be useful,
143
but WITHOUT ANY WARRANTY; without even the implied warranty of
144
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
145
GNU Lesser General Public License for more details.
147
You should have received a copy of the GNU Lesser General Public License
148
along with this program. If not, see http://www.gnu.org/licenses/.
150
For more information, please contact Precision Translation Tools Co., Ltd.
151
at: http://www.precisiontranslationtools.com'''
153
if __name__ == "__main__":
156
sys.stdout.write(usage().encode('utf8')+'\n')