~jtv/corpusfiltergraph/cross-python

« back to all changes in this revision

Viewing changes to trunk/lib/corpusfg/plugins/writer-tab.py

  • Committer: tahoar
  • Date: 2012-05-02 15:46:23 UTC
  • Revision ID: svn-v4:bc069b21-dff4-4e29-a776-06a4e04bad4e::266
new layout. need to update code to use the new layout

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#! /usr/bin/env python
 
2
# -*- coding: utf8 -*-
 
3
 
 
4
#===============================================================================
 
5
# Author: Tom Hoar
 
6
#===============================================================================
 
7
 
 
8
#version:
 
9
#4.0.264 - version update
 
10
 
 
11
import sys
 
12
import os
 
13
import shutil
 
14
import codecs
 
15
from fnmatch import fnmatch
 
16
from flock import flock
 
17
import logging
 
18
import common as cf
 
19
 
 
20
logger = logging.getLogger('.'.join([os.path.splitext(os.path.basename(sys.argv[0]))[0],'manager','filtergraph',__name__]))
 
21
 
 
22
class writer(object):
 
23
 
 
24
        cfg = {
 
25
                'append': False,
 
26
                'deletezerolength': True,
 
27
                'encoding': 'utf8',
 
28
                'eol': 'linux',
 
29
                'reversecolumns': False,
 
30
                'rendertypes': '.tab',
 
31
                'rootfolder': '',
 
32
                'roottype': '',
 
33
                'safemode': False,
 
34
                'stage' : None,
 
35
                'version': '4.0.264',
 
36
                }
 
37
        append = 'w'
 
38
        deletezerolength = True
 
39
        encoding = 'utf8'
 
40
        eol = '\x0a'
 
41
        reversecolumns = False
 
42
        rendertypes = '.*'
 
43
        rootfolder = ''
 
44
        roottype = ''
 
45
        safemode = False
 
46
        stage = None
 
47
        isopen = False
 
48
        p = object
 
49
        errors = []
 
50
 
 
51
        def open(self,parent,cfg):
 
52
                if cfg['stage']:
 
53
                        self.stage = cfg['stage']
 
54
                else:
 
55
                        self.errors.append([__name__,'missing','[%s] \"stage=<missing>\"'%(__name__.split(',')[-1])])
 
56
                        logger.error('%s\t%s',*self.errors[-1][1:])
 
57
                if cfg['roottype']:
 
58
                        self.roottype = cfg['roottype'].strip(',').split(',')[0]
 
59
                        self.p.roottype = cfg['roottype'].strip(',').split(',')[0]
 
60
                else:
 
61
                        self.errors.append([__name__,'missing','[%s] \"roottype=<missing>\"'%(__name__.split(',')[-1])])
 
62
                        logger.error('%s\t%s',*self.errors[-1][1:])
 
63
                self.append = 'a' if cfg['append'] else 'w'
 
64
                self.deletezerolength = cfg['deletezerolength']
 
65
                self.encoding = 'utf8' if 'utf8' in cfg['encoding'].lower().replace('-','') else cfg['encoding']
 
66
                self.eol = '\x0d\x0a' if 'win' in cfg['eol'].lower() else '\x0a' if 'linux' in cfg['eol'].lower() or 'unix' in cfg['eol'].lower() or 'posix' in cfg['eol'].lower() else cfg['eol']
 
67
                self.reversecolumns = cfg['reversecolumns']
 
68
                self.rendertypes = os.extsep+cfg['rendertypes'].lstrip(os.extsep)
 
69
                self.rootfolder = self.normalizeroot(cfg['rootfolder'])
 
70
                self.safemode = cfg['safemode']
 
71
 
 
72
        def run(self,fileobj,k=None,mode=None):
 
73
                '''saves buffer to file'''
 
74
                # find srclang key
 
75
 
 
76
                # skip if k defined but not in fileobj
 
77
                if k and not k in fileobj: return -2
 
78
 
 
79
                keys = [k] if k else fileobj.keys()
 
80
 
 
81
                allkeys = list(keys)
 
82
 
 
83
                # remove empty output buffers
 
84
                for key in keys:
 
85
                        result = self.__removeempty(fileobj,key)
 
86
                        if result: return result
 
87
 
 
88
                # test if any removed buffers are 'tm' alignments and skip to avoid creating mis-aligned files
 
89
                for key in allkeys:
 
90
                        if not key in fileobj.keys() and len(key) > 3 and key[cf.kind] == 'tm':
 
91
                                result = -2
 
92
                if result: return result
 
93
 
 
94
                # rename output files and find srckey
 
95
                ktgt = []
 
96
                ksrc = None
 
97
                for k in fileobj.keys():
 
98
                        if not fnmatch(os.path.splitext(fileobj[key]['filelist'][cf.basename])[1],self.rendertypes):
 
99
                                fileobj[key]['filelist'][cf.basename] = fileobj[key]['filelist'][cf.basename]+self.rendertypes
 
100
                        if k[cf.rdrlang] == k[cf.srclang]:
 
101
                                ksrc = k
 
102
                        else:
 
103
                                ktgt.append(k)
 
104
                if not ksrc: return -2
 
105
 
 
106
                # shuffle buffers
 
107
                for k in ktgt:
 
108
                        if self.reversecolumns:
 
109
                                fileobj[k]['tempbuff'] = ['\t'.join([fileobj[k]['tempbuff'][linenum],fileobj[ksrc]['tempbuff'][linenum]]) for linenum in range(len(fileobj[ksrc]['tempbuff'])) if fileobj[ksrc]['tempbuff'][linenum] or fileobj[k]['tempbuff'][linenum]]
 
110
                        else:
 
111
                                fileobj[k]['tempbuff'] = ['\t'.join([fileobj[ksrc]['tempbuff'][linenum],fileobj[k]['tempbuff'][linenum]]) for linenum in range(len(fileobj[ksrc]['tempbuff'])) if fileobj[ksrc]['tempbuff'][linenum] or fileobj[k]['tempbuff'][linenum]]
 
112
                # dump ksrc buffer to prevent saving file
 
113
                fileobj[ksrc]['tempbuff'] = ['' for line in fileobj[ksrc]['tempbuff']]
 
114
 
 
115
                result = self.__save(fileobj,key,mode) or result
 
116
 
 
117
                return result
 
118
 
 
119
        def flush(self,fileobj,k=None):
 
120
                '''move temp output files to server'''
 
121
                result = None
 
122
                if not self.null:
 
123
                        if k:
 
124
                                result = -2 if self.__mkfolder(fileobj,k) else self.__push(fileobj,k)
 
125
                        else:
 
126
                                # create destination folders (test write access)
 
127
                                for k in fileobj.keys():
 
128
                                        result = self.__mkfolder(fileobj,k) or result
 
129
                                # move tempfile to output path
 
130
                                if not result:
 
131
                                        for k in fileobj.keys():
 
132
                                                result = self.__push(fileobj,k) or result
 
133
                for k in fileobj.keys():
 
134
                        del(fileobj[k])
 
135
                return result
 
136
 
 
137
        def close(self):
 
138
                return
 
139
 
 
140
        def getroot(self,rootfolder):
 
141
                '''Splits rootfolder to file system and rootfolder'''
 
142
                folder = os.path.abspath(os.path.expanduser(rootfolder.rstrip().replace('/',os.sep).replace('\\',os.sep)))
 
143
                if fnmatch(folder,'?:*'):
 
144
                        # DOS drive letter : folder
 
145
                        fss, folder = folder.split(':')
 
146
                        fss = fss + ':'
 
147
                elif fnmatch(folder,'*:*'):
 
148
                        # ssh hostname : folder
 
149
                        fss, folder = folder.split(':')
 
150
                        fss = fss + ':'
 
151
                elif fnmatch(folder,'\\\\*\\*'):
 
152
                        # MS Windows UNC \ folder
 
153
                        fss = '\\\\'+folder.lstrip(os.sep).split(os.sep)[0]
 
154
                        folder = folder.replace(fss,'')
 
155
                else:
 
156
                        # NFS mount / folder
 
157
                        fss = ''
 
158
                        folder = os.path.normpath(folder)
 
159
                        if folder == '.': folder = '*'
 
160
                folder = folder.lstrip('\\/')
 
161
                return [unicode(fss), unicode(folder)]
 
162
 
 
163
        def normalizeroot(self,rootfolder):
 
164
                return rootfolder.replace('\\',os.sep).replace('/',os.sep)
 
165
 
 
166
        def __save(self,fileobj,k,mode):
 
167
                mode = self.append if not mode else mode
 
168
                try:
 
169
                        f = codecs.open(fileobj[k]['tempname'],mode,self.encoding)
 
170
                        f.write('%s%s'%(self.eol.join(fileobj[k]['tempbuff']),self.eol))
 
171
                        f.close()
 
172
                except Exception,e:
 
173
                        logger.exception('%s\t%s - %s',*['failed',self.p.cfinput[k]['tempname'],str(e)])
 
174
                        return -2
 
175
                logger.debug('%s\t%s',*['file',fileobj[k]['tempname']])
 
176
 
 
177
        def __removeempty(self,fileobj,k):
 
178
                if os.path.exists(os.sep.join(fileobj[k]['filelist'])) and self.safemode:
 
179
                        logger.warn('%s\t%s - %s',*['skip','output already exists',os.sep.join(fileobj[k]['filelist']),])
 
180
                        del(fileobj[k])
 
181
                        return -2
 
182
                elif not sum([len(line.strip()) for line in fileobj[k]['tempbuff']]):
 
183
                        if not 'workbench' in fileobj[k]['filelist'][cf.stage] and not 'extract' in fileobj[k]['filelist'][cf.stage]:
 
184
                                logger.warn('%s\t%s - %s',*['skip','empty data',fileobj[k]['tempname']])
 
185
                        if os.path.exists(os.sep.join(fileobj[k]['filelist'])) and self.deletezerolength:
 
186
                                os.unlink(os.sep.join(fileobj[k]['filelist']))
 
187
                                try:
 
188
                                        os.removedirs(os.path.dirname(os.sep.join(fileobj[k]['filelist'])))
 
189
                                except OSError,e:
 
190
                                        if not e.errno == 39:
 
191
                                                logger.exception('%s\t%s, %s, %s',*['failed',e.errno,e.strerror,os.sep.join(fileobj[k]['filelist']),])
 
192
                        del(fileobj[k])
 
193
 
 
194
        def __mkfolder(self,fileobj,k):
 
195
                if os.path.exists(fileobj[k]['tempname']):
 
196
                        try:
 
197
                                os.makedirs(os.path.dirname(os.sep.join(fileobj[k]['filelist'])))
 
198
                        except OSError,e:
 
199
                                if not e.errno == 17:
 
200
                                        logger.exception('%s\t%s, %s, %s',*['failed',e.errno,e.strerror,e.filename,])
 
201
                                        return -2
 
202
 
 
203
        def __push(self,fileobj,k):
 
204
                if not k in fileobj: return
 
205
                if not fnmatch(os.path.splitext(fileobj[k]['filelist'][cf.basename])[1],self.rendertypes):
 
206
                        fileobj[k]['filelist'][cf.basename] = fileobj[k]['filelist'][cf.basename]+self.rendertypes
 
207
                if os.path.exists(fileobj[k]['tempname']):
 
208
                        try:
 
209
                                shutil.move(fileobj[k]['tempname'],os.sep.join(fileobj[k]['filelist']))
 
210
                        except:
 
211
                                return -2
 
212
                        else:
 
213
                                logger.debug('%s\t%s',*['file',os.sep.join(fileobj[k]['filelist'])])
 
214
 
 
215
def usage():
 
216
        '''Command prompt help.'''
 
217
        return "\n%s\n\tUsage:\n\tfrom %s import reader\n"%(
 
218
        os.path.basename(sys.argv[0]),
 
219
        os.path.splitext(os.path.basename(sys.argv[0]))[0]
 
220
        )
 
221
 
 
222
licensetxt=u'''CorpusFiltergraph™ v4.0
 
223
Copyright © 2010-2012 Precision Translation Tools Co., Ltd.
 
224
 
 
225
This program is free software: you can redistribute it and/or modify
 
226
it under the terms of the GNU Lesser General Public License as published by
 
227
the Free Software Foundation, either version 3 of the License, or
 
228
(at your option) any later version.
 
229
 
 
230
This program is distributed in the hope that it will be useful,
 
231
but WITHOUT ANY WARRANTY; without even the implied warranty of
 
232
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
233
GNU Lesser General Public License for more details.
 
234
 
 
235
You should have received a copy of the GNU Lesser General Public License
 
236
along with this program.  If not, see http://www.gnu.org/licenses/.
 
237
 
 
238
For more information, please contact Precision Translation Tools Co., Ltd.
 
239
at: http://www.precisiontranslationtools.com'''
 
240
 
 
241
if __name__ == "__main__":
 
242
        import os
 
243
        import sys
 
244
        sys.stdout.write(usage().encode('utf8')+'\n')