~jtv/corpusfiltergraph/cross-python

« back to all changes in this revision

Viewing changes to trunk/lib/corpusfg/graphs/sa-champollion/en/english_stemmer.py

  • Committer: tahoar
  • Date: 2012-05-02 15:46:23 UTC
  • Revision ID: svn-v4:bc069b21-dff4-4e29-a776-06a4e04bad4e::266
new layout. need to update code to use the new layout

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#! /usr/bin/env python
 
2
# -*- coding: utf8 -*-
 
3
 
 
4
#===============================================================================
 
5
# Author: Tom Hoar
 
6
#===============================================================================
 
7
 
 
8
#version:
 
9
#4.0.264 - version update
 
10
 
 
11
import os
 
12
import sys
 
13
import subprocess
 
14
import logging
 
15
import common as cf
 
16
 
 
17
logger = logging.getLogger('.'.join([os.path.splitext(os.path.basename(sys.argv[0]))[0],'manager','filtergraph',__name__]))
 
18
 
 
19
class filter(object):
 
20
 
 
21
        cfg = {
 
22
                'args': {},
 
23
                'args1': '-m',
 
24
                'args2': '3,morph.en',
 
25
                'encoding': 'utf8',
 
26
                'exeoptions': '-w',
 
27
                'kw': {
 
28
                        'bufsize': 0,
 
29
                        'executable': '4,perl',
 
30
                        'stdin': 'PIPE',
 
31
                        'stdout': 'PIPE',
 
32
                        'stderr': 'PIPE',
 
33
                        'preexec_fn': None,
 
34
                        'close_fds': False,
 
35
                        'shell': False,
 
36
                        'cwd': None,
 
37
                        'env': None,
 
38
                        'universal_newlines': False,
 
39
                        'startupinfo': None,
 
40
                        'creationflags': 0,
 
41
                        },
 
42
                'script': '3,english_stemmer.pl',
 
43
                'version': '4.0.264',
 
44
                }
 
45
        args = []
 
46
        encoding = 'utf8'
 
47
        kw = {}
 
48
        script = '3,english_stemmer.pl'
 
49
        isopen = False
 
50
        p = object
 
51
        errors = []
 
52
 
 
53
        def open(self,parent,cfg):
 
54
                self.args = dict(cfg['args'])
 
55
                self.kw = dict(cfg['kw'])
 
56
                if cfg['script']:
 
57
                        self.script = self.p.decodepath('%s,%s'%(__name__.split(',')[1],cfg['script']),True)
 
58
                        if not self.script:
 
59
                                self.errors.append([__name__,'missing',cfg['script']])
 
60
                                logger.error('%s\t%s',*self.errors[-1][1:])
 
61
                        self.args[0] = self.script
 
62
                        self.kw['cwd'] = os.path.dirname(self.script)
 
63
                else:
 
64
                        self.script = None
 
65
                # convert args dict to args list
 
66
                self.args[2] = self.p.decodepath('%s,%s'%(__name__.split(',')[1],self.args[2]),True)
 
67
                if not self.args[2]:
 
68
                        self.errors.append([__name__,'missing',cfg['args'][2]])
 
69
                        logger.error('%s\t%s',*self.errors[-1][1:])
 
70
                self.args = [str(self.args[k]) for k in sorted(self.args.keys())]
 
71
                if 'executable' in cfg['kw'] and cfg['kw']['executable']:
 
72
                        self.kw['executable'] = self.p.decodepath('%s,%s'%(__name__.split(',')[1],cfg['kw']['executable']),True)
 
73
                        if not self.kw['executable']:
 
74
                                self.errors.append([__name__,'missing',cfg['kw']['executable']])
 
75
                                logger.error('%s\t%s',*self.errors[-1][1:])
 
76
                        self.args.insert(0,cfg['exeoptions'] if cfg['exeoptions'] else '')
 
77
                else:
 
78
                        self.kw['executable'] = None
 
79
                self.kw['stdin'] = subprocess.PIPE if cfg['kw']['stdin'] == 'PIPE' else cfg['kw']['stdin']
 
80
                self.kw['stdout'] = subprocess.PIPE if cfg['kw']['stdout'] == 'PIPE' else cfg['kw']['stdout']
 
81
                self.kw['stderr'] = subprocess.PIPE if cfg['kw']['stderr'] == 'PIPE' else cfg['kw']['stderr']
 
82
                self.encoding = 'utf8' if 'utf8' in cfg['encoding'].lower().replace('-','') else cfg['encoding']
 
83
 
 
84
                try:
 
85
                        self.subproc = subprocess.Popen(self.args,**self.kw)
 
86
                except Exception,e:
 
87
                        raise RuntimeError('failed \"[%s]\" \"%s %s\"'%(__name__,self.kw['executable'],' '.join(self.args)))
 
88
 
 
89
        def run(self,k):
 
90
                '''run filter'''
 
91
                for i,line in enumerate(self.p.cfoutput[k]['tempbuff']):
 
92
                        self.subproc.stdin.write('%s\n'%(line.strip().encode(self.encoding)))
 
93
                        self.p.cfoutput[k]['tempbuff'][i] = self.subproc.stdout.readline().rstrip().decode(self.encoding)
 
94
 
 
95
        def flush(self,k):
 
96
                return
 
97
 
 
98
        def close(self):
 
99
                return
 
100
 
 
101
def usage():
 
102
        '''Command prompt help.'''
 
103
        return "\n%s\n\tUsage:\n\tfrom %s import filter\n"%(
 
104
        os.path.basename(sys.argv[0]),
 
105
        os.path.splitext(os.path.basename(sys.argv[0]))[0]
 
106
        )
 
107
 
 
108
licensetxt=u'''CorpusFiltergraph™ v4.0
 
109
Copyright © 2010-2012 Precision Translation Tools Co., Ltd.
 
110
 
 
111
This program is free software: you can redistribute it and/or modify
 
112
it under the terms of the GNU Lesser General Public License as published by
 
113
the Free Software Foundation, either version 3 of the License, or
 
114
(at your option) any later version.
 
115
 
 
116
This program is distributed in the hope that it will be useful,
 
117
but WITHOUT ANY WARRANTY; without even the implied warranty of
 
118
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
119
GNU Lesser General Public License for more details.
 
120
 
 
121
You should have received a copy of the GNU Lesser General Public License
 
122
along with this program.  If not, see http://www.gnu.org/licenses/.
 
123
 
 
124
For more information, please contact Precision Translation Tools Co., Ltd.
 
125
at: http://www.precisiontranslationtools.com'''
 
126
 
 
127
if __name__ == "__main__":
 
128
        import os
 
129
        import sys
 
130
        sys.stdout.write(usage().encode('utf8')+'\n')