~domy/domyce/trunk

« back to all changes in this revision

Viewing changes to lib/corpusfg/plugins/binarize-phrasetable.py

  • Committer: Tom Hoar
  • Date: 2013-04-28 12:13:05 UTC
  • Revision ID: tahoar@precisiontranslationtools.com-20130428121305-pm7dryk7hlsbenda
Final updates before jtv updates. Cleaned up conf files, renamed default.conf to default.ini, updated demo-2.bat and demo-2.sh to use new file names. Updated to binarize-phrasetable.py to be compatible with Moses RELEASE-1.0. Removed /home/tahoar, /home/tahoar, and  checks in train-lm.py. Referenced mgiza instead of older mgizapp in train-tables.py

Show diffs side-by-side

added added

removed removed

Lines of Context:
6
6
#===============================================================================
7
7
 
8
8
#version:
9
 
#2.5.337 - bugfix for UTF-8 in values that become folder names
 
9
#2.5.339 - update for changes in the RELEASE-1.0.
10
10
 
11
11
import os
12
12
import sys
52
52
                'e': u'',
53
53
                'f': u'',
54
54
                'tmgrams': 7,
55
 
                'version': '2.5.337',
 
55
                'version': '2.5.339',
56
56
 
57
57
                'tmpdir': '',
 
58
                'no-alignment-info': False,
58
59
                'inputs': {},
59
60
                'logfile': '',
60
61
                'outputs': {},
71
72
        f = None
72
73
        tmgrams = 7
73
74
        tmpdir = ''
 
75
        no_alignment_info = False       # -- omit alignment info from the binary ttable 
74
76
        inputs = {}
75
77
        logfile = ''
76
78
        outputs = {}
107
109
 
108
110
                self.model_dir = cfg['model-dir'].replace('%(rootfolder)s',self.p.rootfolder) if cfg['model-dir'] else self.model_dir
109
111
 
 
112
                self.no_alignment_info = cfg['no-alignment-info']
 
113
 
110
114
                if not self.model_dir:
111
115
 
112
116
                        if cfg['tmbuildname']:
163
167
                        self.inputs[path] = [path]
164
168
                        self.outputs[path] = [
165
169
                                self.inputs[path][0].replace('.gz','.binphr.idx'),
166
 
                                self.inputs[path][0].replace('.gz','.binphr.srctree'),
 
170
                                self.inputs[path][0].replace('.gz','.binphr.srctree' if self.no_alignment_info else '.binphr.srctree.wa'),
167
171
                                self.inputs[path][0].replace('.gz','.binphr.srcvoc'),
168
 
                                self.inputs[path][0].replace('.gz','.binphr.tgtdata'),
 
172
                                self.inputs[path][0].replace('.gz','.binphr.tgtdata' if self.no_alignment_info else '.binphr.tgtdata.wa'),
169
173
                                self.inputs[path][0].replace('.gz','.binphr.tgtvoc'),
170
174
                                ]
171
175
 
273
277
                                        '-nscores',unicode(self.tmgrams),
274
278
                                        '-out',self.inputs[k][0].replace('.gz',''),
275
279
                                        ]
 
280
 
 
281
                                if self.no_alignment_info:
 
282
                                        args.append('-no-alignment-info')
 
283
 
276
284
                                try:
277
285
                                        self.subproc['popen'] = subprocess.Popen(args,**self.kw)
278
286
                                except Exception,e: