4
#===============================================================================
6
#===============================================================================
9
#4.0.264 - version update
15
from fnmatch import fnmatch
16
from flock import flock
20
logger = logging.getLogger('.'.join([os.path.splitext(os.path.basename(sys.argv[0]))[0],'manager','filtergraph',__name__]))
26
'deletezerolength': True,
29
'reversecolumns': False,
30
'rendertypes': '.tab',
38
deletezerolength = True
41
reversecolumns = False
51
def open(self,parent,cfg):
53
self.stage = cfg['stage']
55
self.errors.append([__name__,'missing','[%s] \"stage=<missing>\"'%(__name__.split(',')[-1])])
56
logger.error('%s\t%s',*self.errors[-1][1:])
58
self.roottype = cfg['roottype'].strip(',').split(',')[0]
59
self.p.roottype = cfg['roottype'].strip(',').split(',')[0]
61
self.errors.append([__name__,'missing','[%s] \"roottype=<missing>\"'%(__name__.split(',')[-1])])
62
logger.error('%s\t%s',*self.errors[-1][1:])
63
self.append = 'a' if cfg['append'] else 'w'
64
self.deletezerolength = cfg['deletezerolength']
65
self.encoding = 'utf8' if 'utf8' in cfg['encoding'].lower().replace('-','') else cfg['encoding']
66
self.eol = '\x0d\x0a' if 'win' in cfg['eol'].lower() else '\x0a' if 'linux' in cfg['eol'].lower() or 'unix' in cfg['eol'].lower() or 'posix' in cfg['eol'].lower() else cfg['eol']
67
self.reversecolumns = cfg['reversecolumns']
68
self.rendertypes = os.extsep+cfg['rendertypes'].lstrip(os.extsep)
69
self.rootfolder = self.normalizeroot(cfg['rootfolder'])
70
self.safemode = cfg['safemode']
72
def run(self,fileobj,k=None,mode=None):
73
'''saves buffer to file'''
76
# skip if k defined but not in fileobj
77
if k and not k in fileobj: return -2
79
keys = [k] if k else fileobj.keys()
83
# remove empty output buffers
85
result = self.__removeempty(fileobj,key)
86
if result: return result
88
# test if any removed buffers are 'tm' alignments and skip to avoid creating mis-aligned files
90
if not key in fileobj.keys() and len(key) > 3 and key[cf.kind] == 'tm':
92
if result: return result
94
# rename output files and find srckey
97
for k in fileobj.keys():
98
if not fnmatch(os.path.splitext(fileobj[key]['filelist'][cf.basename])[1],self.rendertypes):
99
fileobj[key]['filelist'][cf.basename] = fileobj[key]['filelist'][cf.basename]+self.rendertypes
100
if k[cf.rdrlang] == k[cf.srclang]:
104
if not ksrc: return -2
108
if self.reversecolumns:
109
fileobj[k]['tempbuff'] = ['\t'.join([fileobj[k]['tempbuff'][linenum],fileobj[ksrc]['tempbuff'][linenum]]) for linenum in range(len(fileobj[ksrc]['tempbuff'])) if fileobj[ksrc]['tempbuff'][linenum] or fileobj[k]['tempbuff'][linenum]]
111
fileobj[k]['tempbuff'] = ['\t'.join([fileobj[ksrc]['tempbuff'][linenum],fileobj[k]['tempbuff'][linenum]]) for linenum in range(len(fileobj[ksrc]['tempbuff'])) if fileobj[ksrc]['tempbuff'][linenum] or fileobj[k]['tempbuff'][linenum]]
112
# dump ksrc buffer to prevent saving file
113
fileobj[ksrc]['tempbuff'] = ['' for line in fileobj[ksrc]['tempbuff']]
115
result = self.__save(fileobj,key,mode) or result
119
def flush(self,fileobj,k=None):
120
'''move temp output files to server'''
124
result = -2 if self.__mkfolder(fileobj,k) else self.__push(fileobj,k)
126
# create destination folders (test write access)
127
for k in fileobj.keys():
128
result = self.__mkfolder(fileobj,k) or result
129
# move tempfile to output path
131
for k in fileobj.keys():
132
result = self.__push(fileobj,k) or result
133
for k in fileobj.keys():
140
def getroot(self,rootfolder):
141
'''Splits rootfolder to file system and rootfolder'''
142
folder = os.path.abspath(os.path.expanduser(rootfolder.rstrip().replace('/',os.sep).replace('\\',os.sep)))
143
if fnmatch(folder,'?:*'):
144
# DOS drive letter : folder
145
fss, folder = folder.split(':')
147
elif fnmatch(folder,'*:*'):
148
# ssh hostname : folder
149
fss, folder = folder.split(':')
151
elif fnmatch(folder,'\\\\*\\*'):
152
# MS Windows UNC \ folder
153
fss = '\\\\'+folder.lstrip(os.sep).split(os.sep)[0]
154
folder = folder.replace(fss,'')
158
folder = os.path.normpath(folder)
159
if folder == '.': folder = '*'
160
folder = folder.lstrip('\\/')
161
return [unicode(fss), unicode(folder)]
163
def normalizeroot(self,rootfolder):
164
return rootfolder.replace('\\',os.sep).replace('/',os.sep)
166
def __save(self,fileobj,k,mode):
167
mode = self.append if not mode else mode
169
f = codecs.open(fileobj[k]['tempname'],mode,self.encoding)
170
f.write('%s%s'%(self.eol.join(fileobj[k]['tempbuff']),self.eol))
173
logger.exception('%s\t%s - %s',*['failed',self.p.cfinput[k]['tempname'],str(e)])
175
logger.debug('%s\t%s',*['file',fileobj[k]['tempname']])
177
def __removeempty(self,fileobj,k):
178
if os.path.exists(os.sep.join(fileobj[k]['filelist'])) and self.safemode:
179
logger.warn('%s\t%s - %s',*['skip','output already exists',os.sep.join(fileobj[k]['filelist']),])
182
elif not sum([len(line.strip()) for line in fileobj[k]['tempbuff']]):
183
if not 'workbench' in fileobj[k]['filelist'][cf.stage] and not 'extract' in fileobj[k]['filelist'][cf.stage]:
184
logger.warn('%s\t%s - %s',*['skip','empty data',fileobj[k]['tempname']])
185
if os.path.exists(os.sep.join(fileobj[k]['filelist'])) and self.deletezerolength:
186
os.unlink(os.sep.join(fileobj[k]['filelist']))
188
os.removedirs(os.path.dirname(os.sep.join(fileobj[k]['filelist'])))
190
if not e.errno == 39:
191
logger.exception('%s\t%s, %s, %s',*['failed',e.errno,e.strerror,os.sep.join(fileobj[k]['filelist']),])
194
def __mkfolder(self,fileobj,k):
195
if os.path.exists(fileobj[k]['tempname']):
197
os.makedirs(os.path.dirname(os.sep.join(fileobj[k]['filelist'])))
199
if not e.errno == 17:
200
logger.exception('%s\t%s, %s, %s',*['failed',e.errno,e.strerror,e.filename,])
203
def __push(self,fileobj,k):
204
if not k in fileobj: return
205
if not fnmatch(os.path.splitext(fileobj[k]['filelist'][cf.basename])[1],self.rendertypes):
206
fileobj[k]['filelist'][cf.basename] = fileobj[k]['filelist'][cf.basename]+self.rendertypes
207
if os.path.exists(fileobj[k]['tempname']):
209
shutil.move(fileobj[k]['tempname'],os.sep.join(fileobj[k]['filelist']))
213
logger.debug('%s\t%s',*['file',os.sep.join(fileobj[k]['filelist'])])
216
'''Command prompt help.'''
217
return "\n%s\n\tUsage:\n\tfrom %s import reader\n"%(
218
os.path.basename(sys.argv[0]),
219
os.path.splitext(os.path.basename(sys.argv[0]))[0]
222
licensetxt=u'''CorpusFiltergraph™ v4.0
223
Copyright © 2010-2012 Precision Translation Tools Co., Ltd.
225
This program is free software: you can redistribute it and/or modify
226
it under the terms of the GNU Lesser General Public License as published by
227
the Free Software Foundation, either version 3 of the License, or
228
(at your option) any later version.
230
This program is distributed in the hope that it will be useful,
231
but WITHOUT ANY WARRANTY; without even the implied warranty of
232
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
233
GNU Lesser General Public License for more details.
235
You should have received a copy of the GNU Lesser General Public License
236
along with this program. If not, see http://www.gnu.org/licenses/.
238
For more information, please contact Precision Translation Tools Co., Ltd.
239
at: http://www.precisiontranslationtools.com'''
241
if __name__ == "__main__":
244
sys.stdout.write(usage().encode('utf8')+'\n')