~jtv/corpusfiltergraph/cross-python

« back to all changes in this revision

Viewing changes to trunk/lib/corpusfg/plugins/writer-file.py

  • Committer: tahoar
  • Date: 2012-05-02 15:46:23 UTC
  • Revision ID: svn-v4:bc069b21-dff4-4e29-a776-06a4e04bad4e::266
new layout. need to update code to use the new layout

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#! /usr/bin/env python
 
2
# -*- coding: utf8 -*-
 
3
 
 
4
#===============================================================================
 
5
# Author: Tom Hoar
 
6
#===============================================================================
 
7
 
 
8
#version:
 
9
#4.0.264 - version update
 
10
 
 
11
import sys
 
12
import os
 
13
import shutil
 
14
import codecs
 
15
from fnmatch import fnmatch
 
16
from flock import flock
 
17
import logging
 
18
import common as cf
 
19
 
 
20
logger = logging.getLogger('.'.join([os.path.splitext(os.path.basename(sys.argv[0]))[0],'manager','filtergraph',__name__]))
 
21
 
 
22
class writer(object):
 
23
 
 
24
        cfg = {
 
25
                'append': False,
 
26
                'deletezerolength': True,
 
27
                'encoding': 'utf8',
 
28
                'eol': 'linux',
 
29
                'null': False,
 
30
                'rendertypes': '.*',
 
31
                'rootfolder': '',
 
32
                'roottype': '',
 
33
                'safemode': False,
 
34
                'stage' : None,
 
35
                'version': '4.0.264',
 
36
                }
 
37
        append = 'w'
 
38
        deletezerolength = True
 
39
        encoding = 'utf8'
 
40
        eol = '\x0a'
 
41
        null = False
 
42
        rendertypes = '.*'
 
43
        rootfolder = ''
 
44
        roottype = ''
 
45
        safemode = False
 
46
        stage = None
 
47
        isopen = False
 
48
        p = object
 
49
        errors = []
 
50
 
 
51
        def open(self,parent,cfg):
 
52
                if cfg['stage']:
 
53
                        self.stage = cfg['stage']
 
54
                else:
 
55
                        self.errors.append([__name__,'missing','[%s] \"stage=<missing>\"'%(__name__.split(',')[-1])])
 
56
                        logger.error('%s\t%s',*self.errors[-1][1:])
 
57
                if cfg['roottype']:
 
58
                        self.roottype = cfg['roottype'].strip(',').split(',')[0]
 
59
                        self.p.roottype = cfg['roottype'].strip(',').split(',')[0]
 
60
                else:
 
61
                        self.errors.append([__name__,'missing','[%s] \"roottype=<missing>\"'%(__name__.split(',')[-1])])
 
62
                        logger.error('%s\t%s',*self.errors[-1][1:])
 
63
                self.append = 'a' if cfg['append'] else 'w'
 
64
                self.deletezerolength = cfg['deletezerolength']
 
65
                self.encoding = 'utf8' if 'utf8' in cfg['encoding'].lower().replace('-','') else cfg['encoding']
 
66
                self.eol = '\x0d\x0a' if 'win' in cfg['eol'].lower() else '\x0a' if 'linux' in cfg['eol'].lower() or 'unix' in cfg['eol'].lower() or 'posix' in cfg['eol'].lower() else cfg['eol']
 
67
                self.null = cfg['null']
 
68
                self.rendertypes = os.extsep+cfg['rendertypes'].lstrip(os.extsep)
 
69
                self.rootfolder = self.normalizeroot(cfg['rootfolder'])
 
70
                self.safemode = cfg['safemode']
 
71
 
 
72
        def run(self,fileobj,k=None,mode=None):
 
73
                '''saves buffer to file'''
 
74
                result = None
 
75
 
 
76
                # skip if self.null set
 
77
                if not self.null:
 
78
 
 
79
                        # skip if k defined but not in fileobj
 
80
                        if k and not k in fileobj: return -2
 
81
 
 
82
                        keys = [k] if k else fileobj.keys()
 
83
 
 
84
                        allkeys = list(keys)
 
85
 
 
86
                        # remove empty output buffers
 
87
                        for key in keys:
 
88
                                result = self.__removeempty(fileobj,key)
 
89
                                if result: return result
 
90
 
 
91
                        # test if any removed buffers are 'tm' alignments and skip to avoid creating mis-aligned files
 
92
                        for key in allkeys:
 
93
                                if not key in fileobj.keys() and len(key) > 3 and key[cf.kind] == 'tm':
 
94
                                        result = -2
 
95
 
 
96
                        # save when everything is ok
 
97
                        if not result:
 
98
                                for key in fileobj.keys():
 
99
                                        # update file extension to rendertypes
 
100
                                        if not fnmatch(os.path.splitext(fileobj[key]['filelist'][cf.basename])[1],self.rendertypes):
 
101
                                                fileobj[key]['filelist'][cf.basename] = fileobj[key]['filelist'][cf.basename]+self.rendertypes
 
102
                                        result = self.__save(fileobj,key,mode) or result
 
103
 
 
104
                return result
 
105
 
 
106
        def flush(self,fileobj,k=None):
 
107
                '''move temp output files to server'''
 
108
                result = None
 
109
                if not self.null:
 
110
                        if k:
 
111
                                result = -2 if self.__mkfolder(fileobj,k) else self.__push(fileobj,k)
 
112
                        else:
 
113
                                # create destination folders (test write access)
 
114
                                for k in fileobj.keys():
 
115
                                        result = self.__mkfolder(fileobj,k) or result
 
116
                                # move tempfile to output path
 
117
                                if not result:
 
118
                                        for k in fileobj.keys():
 
119
                                                result = self.__push(fileobj,k) or result
 
120
                for k in fileobj.keys():
 
121
                        del(fileobj[k])
 
122
                return result
 
123
 
 
124
        def close(self):
 
125
                return
 
126
 
 
127
        def getroot(self,rootfolder):
 
128
                '''Splits rootfolder to file system and rootfolder'''
 
129
                folder = os.path.abspath(os.path.expanduser(rootfolder.rstrip().replace('/',os.sep).replace('\\',os.sep)))
 
130
                if fnmatch(folder,'?:*'):
 
131
                        # DOS drive letter : folder
 
132
                        fss, folder = folder.split(':')
 
133
                        fss = fss + ':'
 
134
                elif fnmatch(folder,'*:*'):
 
135
                        # ssh hostname : folder
 
136
                        fss, folder = folder.split(':')
 
137
                        fss = fss + ':'
 
138
                elif fnmatch(folder,'\\\\*\\*'):
 
139
                        # MS Windows UNC \ folder
 
140
                        fss = '\\\\'+folder.lstrip(os.sep).split(os.sep)[0]
 
141
                        folder = folder.replace(fss,'')
 
142
                else:
 
143
                        # NFS mount / folder
 
144
                        fss = ''
 
145
                        folder = os.path.normpath(folder)
 
146
                        if folder == '.': folder = '*'
 
147
                folder = folder.lstrip('\\/')
 
148
                return [unicode(fss), unicode(folder)]
 
149
 
 
150
        def normalizeroot(self,rootfolder):
 
151
                return rootfolder.replace('\\',os.sep).replace('/',os.sep)
 
152
 
 
153
        def __save(self,fileobj,k,mode):
 
154
                mode = self.append if not mode else mode
 
155
                try:
 
156
                        f = codecs.open(fileobj[k]['tempname'],mode,self.encoding)
 
157
                        f.write('%s%s'%(self.eol.join(fileobj[k]['tempbuff']),self.eol))
 
158
                        f.close()
 
159
                except Exception,e:
 
160
                        logger.exception('%s\t%s - %s',*['failed',self.p.cfinput[k]['tempname'],str(e)])
 
161
                        return -2
 
162
                logger.debug('%s\t%s',*['file',fileobj[k]['tempname']])
 
163
 
 
164
        def __removeempty(self,fileobj,k):
 
165
                if os.path.exists(os.sep.join(fileobj[k]['filelist'])) and self.safemode:
 
166
                        logger.warn('%s\t%s - %s',*['skip','output already exists',os.sep.join(fileobj[k]['filelist']),])
 
167
                        del(fileobj[k])
 
168
                        return -2
 
169
                elif not sum([len(line.strip()) for line in fileobj[k]['tempbuff']]):
 
170
                        if not 'workbench' in fileobj[k]['filelist'][cf.stage] and not 'extract' in fileobj[k]['filelist'][cf.stage]:
 
171
                                logger.warn('%s\t%s - %s',*['skip','empty data',fileobj[k]['tempname']])
 
172
                        if os.path.exists(os.sep.join(fileobj[k]['filelist'])) and self.deletezerolength:
 
173
                                os.unlink(os.sep.join(fileobj[k]['filelist']))
 
174
                                try:
 
175
                                        os.removedirs(os.path.dirname(os.sep.join(fileobj[k]['filelist'])))
 
176
                                except OSError,e:
 
177
                                        if not e.errno == 39:
 
178
                                                logger.exception('%s\t%s, %s, %s',*['failed',e.errno,e.strerror,os.sep.join(fileobj[k]['filelist']),])
 
179
                        del(fileobj[k])
 
180
 
 
181
        def __mkfolder(self,fileobj,k):
 
182
                if os.path.exists(fileobj[k]['tempname']):
 
183
                        try:
 
184
                                os.makedirs(os.path.dirname(os.sep.join(fileobj[k]['filelist'])))
 
185
                        except OSError,e:
 
186
                                if not e.errno == 17:
 
187
                                        logger.exception('%s\t%s, %s, %s',*['failed',e.errno,e.strerror,e.filename,])
 
188
                                        return -2
 
189
 
 
190
        def __push(self,fileobj,k):
 
191
                if not k in fileobj: return
 
192
                if os.path.exists(fileobj[k]['tempname']):
 
193
                        try:
 
194
                                shutil.move(fileobj[k]['tempname'],os.sep.join(fileobj[k]['filelist']))
 
195
                        except:
 
196
                                return -2
 
197
                        else:
 
198
                                logger.debug('%s\t%s',*['file',os.sep.join(fileobj[k]['filelist'])])
 
199
 
 
200
def usage():
 
201
        '''Command prompt help.'''
 
202
        return "\n%s\n\tUsage:\n\tfrom %s import reader\n"%(
 
203
        os.path.basename(sys.argv[0]),
 
204
        os.path.splitext(os.path.basename(sys.argv[0]))[0]
 
205
        )
 
206
 
 
207
licensetxt=u'''CorpusFiltergraph™ v4.0
 
208
Copyright © 2010-2012 Precision Translation Tools Co., Ltd.
 
209
 
 
210
This program is free software: you can redistribute it and/or modify
 
211
it under the terms of the GNU Lesser General Public License as published by
 
212
the Free Software Foundation, either version 3 of the License, or
 
213
(at your option) any later version.
 
214
 
 
215
This program is distributed in the hope that it will be useful,
 
216
but WITHOUT ANY WARRANTY; without even the implied warranty of
 
217
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
218
GNU Lesser General Public License for more details.
 
219
 
 
220
You should have received a copy of the GNU Lesser General Public License
 
221
along with this program.  If not, see http://www.gnu.org/licenses/.
 
222
 
 
223
For more information, please contact Precision Translation Tools Co., Ltd.
 
224
at: http://www.precisiontranslationtools.com'''
 
225
 
 
226
if __name__ == "__main__":
 
227
        import os
 
228
        import sys
 
229
        sys.stdout.write(usage().encode('utf8')+'\n')