~domy/domyce/trunk

« back to all changes in this revision

Viewing changes to lib/corpusfg/demo-2.sh

  • Committer: Tom Hoar
  • Date: 2013-04-28 12:13:05 UTC
  • Revision ID: tahoar@precisiontranslationtools.com-20130428121305-pm7dryk7hlsbenda
Final updates before jtv updates. Cleaned up conf files, renamed default.conf to default.ini, updated demo-2.bat and demo-2.sh to use new file names. Updated to binarize-phrasetable.py to be compatible with Moses RELEASE-1.0. Removed /home/tahoar, /home/tahoar, and  checks in train-lm.py. Referenced mgiza instead of older mgizapp in train-tables.py

Show diffs side-by-side

added added

removed removed

Lines of Context:
22
22
usage(){
23
23
        cat <<- HELP
24
24
        Usage: ${0##*/} [OPTION...]
25
 
        Run the second demo wizard
 
25
        Run the second DoMY wizard
26
26
        
27
27
        Options:
28
28
          -c       continue without pausing to read prompts
29
29
          -s       skips $BASE train step
30
30
          -t       set timer for training graphs
31
 
          -r       reuse previous corpus (train only)
 
31
          -r1      reuse micro corpus (train micro corpus only)
 
32
          -r2      reuse domy corpus (train domy corpus only)
32
33
          -h       this help text
33
34
        
34
35
        Examples:
37
38
        
38
39
          ${0##*/} -s -c
39
40
             This runs all data preparation and translation without
40
 
             retraining the translation models.
 
41
             retraining the SMT models.
41
42
        
42
43
        HELP
43
44
        exit $1
44
45
        }
45
46
 
 
47
clean(){
 
48
        [ -d "$rootfolder/CORPORA/ready_micro/" ] && rm -rf "$rootfolder/CORPORA/ready_micro/"
 
49
        [ -d "$rootfolder/CORPORA/ready_micro-workbench/" ] && rm -rf "$rootfolder/CORPORA/ready_micro-workbench/"
 
50
        [ -d "$rootfolder/CORPORA/ready_domy/" ] && rm -rf "$rootfolder/CORPORA/ready_domy/"
 
51
        [ -d "$rootfolder/CORPORA/ready_domy-workbench/" ] && rm -rf "$rootfolder/CORPORA/ready_domy-workbench/"
 
52
#       [ -d "$rootfolder/BUILDS/lm/micro_lm/" ] && rm -rf "$rootfolder/BUILDS/lm/micro_lm/"
 
53
#       [ -d "$rootfolder/BUILDS/tm/micro_tm/" ] && rm -rf "$rootfolder/BUILDS/tm/micro_tm/"
 
54
#       [ -d "$rootfolder/BUILDS/lm/domy_lm/" ] && rm -rf "$rootfolder/BUILDS/lm/domy_lm/"
 
55
#       [ -d "$rootfolder/BUILDS/tm/domy_tm/" ] && rm -rf "$rootfolder/BUILDS/tm/domy_tm/"
 
56
        exit 0
 
57
        }
 
58
 
46
59
[[ "${args[@]}" =~ "-h" ]] && usage 0
47
60
[[ "${args[@]}" =~ "--help" ]] && usage 0
48
61
 
70
83
if [ ! -d "$(dirname $(dirname ${DOMY}))/share/corpusfg" ] ; then
71
84
        rootfolder=$(dirname $(dirname ${DOMY}))/share/domy
72
85
        fi
 
86
 
 
87
if [[ "${args[@]}" =~ "-clean" ]] ; then
 
88
        clean
 
89
        fi
 
90
 
73
91
if [ -f "$userfiles/$BASE.ini" ] ; then
74
92
        while read line ; do
75
93
                [[ "${line}" =~ "=" ]] && eval "${line// /}" 2>/dev/null
100
118
 
101
119
clear
102
120
 
103
 
if [[ ! "${args[@]}" =~ "-r" ]]; then
 
121
if [[ ! "${args[@]}" =~ "-r1" ]] ; then
 
122
if [[ ! "${args[@]}" =~ "-r2" ]] ; then
104
123
        echo "Introduction:"
105
124
        echo
106
125
        echo "This second demo runs example \"GRAPHS\" 6 to 14 to consolidate data,"
107
 
        echo "train translation models, evaluate translation models and translate"
108
 
        echo "documents using both translation models."
 
126
        echo "train two SMT models, evaluate SMT models and"
 
127
        echo "translate a document using both SMT models."
109
128
        echo
110
129
        echo "Example 6 cleans a micro corpus for use as SMT training corpus"
111
130
        echo "Example 7 creates the lm BUILD set named \"micro_lm\""
112
131
        echo "Example 8 creates the tm BUILD set named \"micro_tm\""
113
 
        echo "Example 9 trains, tunes, evaluates a \"micro\" translation model"
114
 
        echo "Example 10 cleans a demo corpus from same data with different options"
115
 
        echo "Example 11 creates the lm BUILD set named \"demo_lm\""
116
 
        echo "Example 12 creates the tm BUILD set named \"demo_tm\""
117
 
        echo "Example 13 trains, tunes, evaluates a \"demo\" translation model"
 
132
        echo "Example 9 trains, tunes, evaluates a \"micro\" SMT model"
 
133
        echo "Example 10 cleans a domy corpus from same data with different options"
 
134
        echo "Example 11 creates the lm BUILD set named \"domy_lm\""
 
135
        echo "Example 12 creates the tm BUILD set named \"domy_tm\""
 
136
        echo "Example 13 trains, tunes, evaluates a \"domy\" SMT model"
118
137
        echo "Example 14 translates sample document with both models"
119
138
        echo
120
139
        echo "You can find the GRAPHs and their config.ini files in:"
171
190
                echo "Review"
172
191
                echo "Command:  ~\$ $BASE clean-tm"
173
192
                echo "Inputs:   \"file://$rootfolder/CORPORA/sa/EuroParl%20v5/Legislation/debates/tm/nl/\""
174
 
                echo "Outputs1: \"file://$rootfolder/CORPORA.demo/ready_micro/EuroParl%20v5/Legislation/debates/tm/nl/\""
175
 
                echo "Outputs2: \"file://$rootfolder/CORPORA.demo/ready_micro-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
193
                echo "Outputs1: \"file://$rootfolder/CORPORA/ready_micro/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
194
                echo "Outputs2: \"file://$rootfolder/CORPORA/ready_micro-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
176
195
                echo
177
196
        else
178
197
                echo "\"clean-tm\" micro configuration is not installed."
185
204
 
186
205
        echo "Example 7: create an lm BUILD set named \"micro_lm\""
187
206
        echo
188
 
        echo "Training a translation model requires a language model \"BUILD\" set"
 
207
        echo "Training an SMT model requires a language model \"BUILD\" set"
189
208
        echo "created. here, and a tm \"BUILD\" set created in Examples 10 & 11 below."
190
209
        echo "This GRAPH selects and consolidates target language data to create both"
191
210
        echo "a language model and to train a \"recaser\" model."
210
229
                echo
211
230
                echo "Review"
212
231
                echo "Command:  ~\$ $BASE build-lm"
213
 
                echo "Inputs1:  \"file://$rootfolder/CORPORA.demo/ready_demo/EuroParl%20v5/Legislation/debates/tm/nl/\""
214
 
                echo "Inputs2:  \"file://$rootfolder/CORPORA.demo/ready_demo-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
215
 
                echo "Inputs3:  \"file://$rootfolder/CORPORA.demo/ready_demo-workbench/EuroParl%20v5/Legislation/debates/lm/nl/\""
 
232
                echo "Inputs1:  \"file://$rootfolder/CORPORA/ready_domy/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
233
                echo "Inputs2:  \"file://$rootfolder/CORPORA/ready_domy-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
234
                echo "Inputs3:  \"file://$rootfolder/CORPORA/ready_domy-workbench/EuroParl%20v5/Legislation/debates/lm/nl/\""
216
235
                echo "Output:   \"file://$rootfolder/BUILDS/lm/micro_lm/\""
217
236
                echo
218
237
        else
226
245
 
227
246
        echo "Example 8: create a tm BUILD set named \"micro_tm\""
228
247
        echo
229
 
        echo "The example GRAPHS in demo-1.sh processed all data in their original,"
 
248
        echo "The example GRAPHS clean-tm processed all data in their original,"
230
249
        echo "separate files. This allows users to maintain data categories during"
231
250
        echo "processing. The separate data files must be consolidated before"
232
251
        echo "training. The \"BUILD\" process selects, consolidates the data. This"
233
252
        echo "GRAPH creates a tm \"BUILD\" set with 12 files from the \"micro\""
234
 
        echo "SMT data set created in demo-1.sh. This GRAPH's config file defines:"
 
253
        echo "SMT data set created by clean-tm. This GRAPH's config file defines:"
235
254
        echo "     mertset = 300"
236
255
        echo "     evalset = 300"
237
256
        echo
257
276
                echo
258
277
                echo "Review"
259
278
                echo "Command:  ~\$ $BASE build-tm"
260
 
                echo "Inputs:   \"file://$rootfolder/CORPORA.demo/ready_demo/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
279
                echo "Inputs:   \"file://$rootfolder/CORPORA/ready_domy/EuroParl%20v5/Legislation/debates/tm/nl/\""
261
280
                echo "Outputs:  \"file://$rootfolder/BUILDS/tm/micro_tm\""
262
281
                echo
263
282
        else
265
284
                echo
266
285
                fi
267
286
 
268
 
        echo "Next: Example 9, train a translation model with BUILD sets named \"micro_tm\" and \"micro_lm\""
 
287
        echo "Next: Example 9, train an SMT model with BUILD sets named \"micro_tm\" and \"micro_lm\""
269
288
        [[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
270
289
        clear
271
290
 
272
 
        echo "Example 9: train a translation model with BUILD sets named"
 
291
        echo "Example 9: train an SMT model with BUILD sets named"
273
292
        echo "            \"micro_tm\" and \"micro_lm\""
274
293
        echo
275
294
        echo "The training process consists of five sub-steps:"
276
295
        echo "   1) train-lm      - trains the language model"
277
296
        echo "   2) train-tm      - trains the phrase and reordering tables, then"
278
297
        echo "                      binarizes the tables This takes a long time."
279
 
        echo "   3) train-mert    - \"Minimum Error Rate Tuning\" creates a translation"
 
298
        echo "   3) train-mert    - \"Minimum Error Rate Tuning\" creates an SMT"
280
299
        echo "                      model consisting of phrase and reordering tables,"
281
300
        echo "                      a language model and configuration file with"
282
301
        echo "                      optimal settings. This takes a long time."
283
 
        echo "   4) train-eval    - evaluates the \"tuned\" translation model"
 
302
        echo "   4) train-eval    - evaluates the \"tuned\" SMT model"
284
303
        echo "   5) train-recaser - trains a recaser model that restores"
285
304
        echo "                      upper/lower case to translations"
286
305
        echo
289
308
        [[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" to start Example 9..."
290
309
        clear
291
310
        fi
292
 
 
293
 
 
294
 
 
295
 
 
296
 
echo "Example 9: train a translation model with BUILD sets named"
297
 
echo "            \"micro_tm\" and \"micro_lm\""
298
 
echo
299
 
echo "Command:  ~\$ $BASE train"
300
 
echo
301
 
if [ -f "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'micro' corpus.conf" ]; then
302
 
 
303
 
 
304
 
        if [[ ! "${args[@]}" =~ "-s" ]]; then
305
 
                cp "$graphs/train/config.ini" $graphs/train/.config.ini.tmp
306
 
                cp "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'micro' corpus.conf" $graphs/train/config.ini
307
 
                [ -n "${time_prefix}" ] && time_prefix="${time_bin} -po $userfiles/time-micro.log "
308
 
                ${time_prefix} python $cfgm -v train
309
 
                result=$?
310
 
                mv $graphs/train/.config.ini.tmp $graphs/train/config.ini
311
 
                [ $result -ne 0 ] && leave $result
312
 
                echo
313
 
                echo "Review"
314
 
                echo "Command:  ~\$ $BASE train"
315
 
                echo "Input1:   \"file://$rootfolder/BUILDS/lm/micro_lm\""
316
 
                echo "Input2:   \"file://$rootfolder/BUILDS/tm/micro_tm\""
317
 
                echo "Output:  \"file://$rootfolder/ENGINES/evals/eval-s=nl-t=en-p=micro_tm-a=giza-g=3-l=micro_lm-T=irstlm-n=3/moses-mert.mteval-v12.pl.txt\""
 
311
        fi
 
312
 
 
313
 
 
314
 
 
315
if [[ ! "${args[@]}" =~ "-r2" ]] ; then
 
316
        echo "Example 9: train an SMT model with BUILD sets named"
 
317
        echo "            \"micro_tm\" and \"micro_lm\""
 
318
        echo
 
319
        echo "Command:  ~\$ $BASE train"
 
320
        echo
 
321
        if [ -f "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'micro' corpus.conf" ]; then
 
322
 
 
323
                if [[ ! "${args[@]}" =~ "-s" ]] ; then
 
324
                        cp "$graphs/train/config.ini" $graphs/train/.config.ini.tmp
 
325
                        cp "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'micro' corpus.conf" $graphs/train/config.ini
 
326
                        [ -n "${time_prefix}" ] && time_prefix="${time_bin} -po $userfiles/time-micro.log "
 
327
                        ${time_prefix} python $cfgm -v train
 
328
                        result=$?
 
329
                        mv $graphs/train/.config.ini.tmp $graphs/train/config.ini
 
330
                        [ $result -ne 0 ] && leave $result
 
331
                        echo
 
332
                        echo "Review"
 
333
                        echo "Command:  ~\$ $BASE train"
 
334
                        echo "Input1:   \"file://$rootfolder/BUILDS/lm/micro_lm\""
 
335
                        echo "Input2:   \"file://$rootfolder/BUILDS/tm/micro_tm\""
 
336
                        echo "Output:  \"file://$rootfolder/ENGINES/evals/eval-s=nl-t=en-p=micro_tm-a=giza-g=3-l=micro_lm-T=irstlm-n=3/moses-mert.mteval-v12.pl.txt\""
 
337
                        echo
 
338
                        fi
 
339
                [[ "${args[@]}" =~ "-r1" ]] && leave 0
 
340
        else
 
341
                echo "\"train\" micro configuration is not installed."
318
342
                echo
319
343
                fi
320
 
else
321
 
        echo "\"train\" micro configuration is not installed."
322
 
        echo
323
344
        fi
324
345
 
325
 
if [[ ! "${args[@]}" =~ "-r" ]]; then
326
 
        echo "Next: Example 10, prepare nl-en sample data (demo set)"
 
346
if [[ ! "${args[@]}" =~ "-r1" ]] ; then
 
347
if [[ ! "${args[@]}" =~ "-r2" ]] ; then
 
348
        echo "Next: Example 10, prepare nl-en sample data (domy set)"
327
349
        [[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
328
350
        clear
329
351
 
330
 
        echo "Example 10: prepare nl-en sample data (demo set)"
 
352
        echo "Example 10: prepare nl-en sample data (domy set)"
331
353
        echo
332
354
        echo "This GRAPH starts with the same data as Example 6. New \"mintoken\","
333
355
        echo "and \"maxtoken\" settings create a larger SMT training data set. This"
344
366
 
345
367
 
346
368
 
347
 
        echo "Example 10: prepare nl-en sample data (demo set)"
 
369
        echo "Example 10: prepare nl-en sample data (domy set)"
348
370
        echo
349
371
        echo "Command:  ~\$ $BASE clean-tm"
350
372
        echo
351
 
        if [ -f "$graphs/clean-tm/Demo - clean nl-en 'demo' parallel corpus.conf" ]; then
 
373
        if [ -f "$graphs/clean-tm/Demo - clean nl-en 'domy' parallel corpus.conf" ]; then
352
374
                cp "$graphs/clean-tm/config.ini" $graphs/clean-tm/.config.ini.tmp
353
 
                cp "$graphs/clean-tm/Demo - clean nl-en 'demo' parallel corpus.conf" $graphs/clean-tm/config.ini
 
375
                cp "$graphs/clean-tm/Demo - clean nl-en 'domy' parallel corpus.conf" $graphs/clean-tm/config.ini
354
376
                python $cfgm -v clean-tm
355
377
                result=$?
356
378
                mv $graphs/clean-tm/.config.ini.tmp $graphs/clean-tm/config.ini
359
381
                echo "Review"
360
382
                echo "Command:  ~\$ $BASE clean-tm"
361
383
                echo "Inputs:   \"file://$rootfolder/CORPORA/sa/EuroParl%20v5/Legislation/debates/tm/nl/\""
362
 
                echo "Outputs1: \"file://$rootfolder/CORPORA.demo/ready_demo/EuroParl%20v5/Legislation/debates/tm/nl/\""
363
 
                echo "Outputs2: \"file://$rootfolder/CORPORA.demo/ready_demo-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
384
                echo "Outputs1: \"file://$rootfolder/CORPORA/ready_domy/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
385
                echo "Outputs2: \"file://$rootfolder/CORPORA/ready_domy-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
364
386
                echo
365
387
        else
366
 
                echo "\"clean-tm\" demo configuration is not installed."
 
388
                echo "\"clean-tm\" domy configuration is not installed."
367
389
                echo
368
390
                fi
369
391
 
370
 
        echo "Next: Example 11, create an lm BUILD set named \"demo_lm\""
 
392
        echo "Next: Example 11, create an lm BUILD set named \"domy_lm\""
371
393
        [[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
372
394
        clear
373
395
 
374
 
        echo "Example 11: create an lm BUILD set named \"demo_lm\""
 
396
        echo "Example 11: create an lm BUILD set named \"domy_lm\""
375
397
        echo
376
 
        echo "Training a translation model requires a language model \"BUILD\" set"
 
398
        echo "Training an SMT model requires a language model \"BUILD\" set"
377
399
        echo "created. here, and a tm \"BUILD\" set created in Examples 10 & 11 below."
378
400
        echo "This GRAPH selects and consolidates target language data to create both"
379
401
        echo "a language model and to train a \"recaser\" model."
384
406
        clear
385
407
 
386
408
 
387
 
        echo "Example 11: create an lm BUILD set named \"demo_lm\""
 
409
        echo "Example 11: create an lm BUILD set named \"domy_lm\""
388
410
        echo
389
411
        echo "Command:  ~\$ $BASE build-lm"
390
412
        echo
391
 
        if [ -f "$graphs/build-lm/Demo - build nl-en 'LM BUILD set' with 'demo' corpus.conf" ]; then
 
413
        if [ -f "$graphs/build-lm/Demo - build nl-en 'LM BUILD set' with 'domy' corpus.conf" ]; then
392
414
                cp "$graphs/build-lm/config.ini" $graphs/build-lm/.config.ini.tmp
393
 
                cp "$graphs/build-lm/Demo - build nl-en 'LM BUILD set' with 'demo' corpus.conf" $graphs/build-lm/config.ini
 
415
                cp "$graphs/build-lm/Demo - build nl-en 'LM BUILD set' with 'domy' corpus.conf" $graphs/build-lm/config.ini
394
416
                python $cfgm -v build-lm
395
417
                result=$?
396
418
                mv $graphs/build-lm/.config.ini.tmp $graphs/build-lm/config.ini
398
420
                echo
399
421
                echo "Review"
400
422
                echo "Command:  ~\$ $BASE build-lm"
401
 
                echo "Inputs1:  \"file://$rootfolder/CORPORA.demo/ready_demo/EuroParl%20v5/Legislation/debates/tm/nl/\""
402
 
                echo "Inputs2:  \"file://$rootfolder/CORPORA.demo/ready_demo-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
403
 
                echo "Inputs3:  \"file://$rootfolder/CORPORA.demo/ready_demo-workbench/EuroParl%20v5/Legislation/debates/lm/nl/\""
404
 
                echo "Output:   \"file://$rootfolder/BUILDS/lm/demo_lm/\""
 
423
                echo "Inputs1:  \"file://$rootfolder/CORPORA/ready_domy/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
424
                echo "Inputs2:  \"file://$rootfolder/CORPORA/ready_domy-workbench/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
425
                echo "Inputs3:  \"file://$rootfolder/CORPORA/ready_domy-workbench/EuroParl%20v5/Legislation/debates/lm/nl/\""
 
426
                echo "Output:   \"file://$rootfolder/BUILDS/lm/domy_lm/\""
405
427
                echo
406
428
        else
407
 
                echo "\"build-lm\" demo configuration is not installed."
 
429
                echo "\"build-lm\" domy configuration is not installed."
408
430
                echo
409
431
                fi
410
432
 
411
 
        echo "Next: Example 12, create a tm BUILD set named \"demo_tm\""
 
433
        echo "Next: Example 12, create a tm BUILD set named \"domy_tm\""
412
434
        [[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
413
435
        clear
414
436
 
415
 
        echo "Example 12: create a tm BUILD set named \"demo_tm\""
 
437
        echo "Example 12: create a tm BUILD set named \"domy_tm\""
416
438
        echo
417
439
        echo "This GRAPH selectes and consolidates the larger SMT data set created"
418
 
        echo "in demo-1.sh and creates a tm \"BUILD\" set of 12 files. This"
 
440
        echo "in clean-tm and creates a tm \"BUILD\" set of 12 files. This"
419
441
        echo "GRAPH's config file defines:"
420
442
        echo "     mertset = 500"
421
443
        echo "     evalset = 500"
431
453
 
432
454
 
433
455
 
434
 
        echo "Example 12: create a tm BUILD set named \"demo_tm\""
 
456
        echo "Example 12: create a tm BUILD set named \"domy_tm\""
435
457
        echo
436
458
        echo "Command:  ~\$ $BASE build-tm"
437
459
        echo
438
 
        if [ -f "$graphs/build-tm/Demo - build nl-en 'TM BUILD set' with 'demo' parallel corpus.conf" ]; then
 
460
        if [ -f "$graphs/build-tm/Demo - build nl-en 'TM BUILD set' with 'domy' parallel corpus.conf" ]; then
439
461
                cp "$graphs/build-tm/config.ini" $graphs/build-tm/.config.ini.tmp
440
 
                cp "$graphs/build-tm/Demo - build nl-en 'TM BUILD set' with 'demo' parallel corpus.conf" $graphs/build-tm/config.ini
 
462
                cp "$graphs/build-tm/Demo - build nl-en 'TM BUILD set' with 'domy' parallel corpus.conf" $graphs/build-tm/config.ini
441
463
                python $cfgm -v build-tm
442
464
                result=$?
443
465
                mv $graphs/build-tm/.config.ini.tmp $graphs/build-tm/config.ini
445
467
                echo
446
468
                echo "Review"
447
469
                echo "Command:  ~\$ $BASE build-tm"
448
 
                echo "Inputs:   \"file://$rootfolder/CORPORA.demo/ready_demo/EuroParl%20v5/Legislation/debates/tm/nl/\""
449
 
                echo "Outputs:  \"file://$rootfolder/BUILDS/tm/demo_tm\""
 
470
                echo "Inputs:   \"file://$rootfolder/CORPORA/ready_domy/EuroParl%20v5/Legislation/debates/tm/nl/\""
 
471
                echo "Outputs:  \"file://$rootfolder/BUILDS/tm/domy_tm\""
450
472
                echo
451
473
        else
452
 
                echo "\"build-tm\" demo configuration is not installed."
 
474
                echo "\"build-tm\" domy configuration is not installed."
453
475
                echo
454
476
                fi
455
477
 
456
 
        echo "Next: Example 13, train a translation model with BUILD sets named \"demo_tm\" and \"demo_lm\""
 
478
        echo "Next: Example 13, train an SMT model with BUILD sets named \"domy_tm\" and \"domy_lm\""
457
479
        [[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
458
480
        clear
459
481
 
460
 
        echo "Example 13: train a translation model with BUILD sets named"
461
 
        echo "            \"demo_tm\" and \"demo_lm\""
 
482
        echo "Example 13: train an SMT model with BUILD sets named"
 
483
        echo "            \"domy_tm\" and \"domy_lm\""
462
484
        echo
463
485
        echo "The training process consists of five sub-steps:"
464
486
        echo "   1) train-lm      - trains the language model"
468
490
        echo "                      model consisting of phrase and reordering tables,"
469
491
        echo "                      a language model and configuration file with"
470
492
        echo "                      optimal settings. This takes a long time."
471
 
        echo "   4) train-eval    - evaluates the \"tuned\" translation model"
 
493
        echo "   4) train-eval    - evaluates the \"tuned\" SMT model"
472
494
        echo "   5) train-recaser - trains a recaser model that restores"
473
495
        echo "                      upper/lower case to translations"
474
496
 
480
502
        [[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" to start Example 13..."
481
503
        clear
482
504
        fi
483
 
 
484
 
echo "Example 13: train a translation model with BUILD sets named"
485
 
echo "            \"demo_tm\" and \"demo_lm\""
486
 
echo
487
 
echo "Command:  ~\$ $BASE train"
488
 
echo
489
 
if [ -f "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'demo' corpus.conf" ]; then
490
 
 
491
 
 
492
 
        if [[ ! "${args[@]}" =~ "-s" ]]; then
493
 
                cp "$graphs/train/config.ini" $graphs/train/.config.ini.tmp
494
 
                cp "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'demo' corpus.conf" $graphs/train/config.ini
495
 
                [ -n "${time_prefix}" ] && time_prefix="${time_bin} -po $userfiles/time-demo.log "
496
 
                ${time_prefix} python $cfgm -v train
497
 
                result=$?
498
 
                mv $graphs/train/.config.ini.tmp $graphs/train/config.ini
499
 
                [ $result -ne 0 ] && leave $result
500
 
                echo
501
 
                echo "Review"
502
 
                echo "Command:  ~\$ $BASE train"
503
 
                echo "Input1:   \"file://$rootfolder/BUILDS/lm/demo_lm\""
504
 
                echo "Input2:   \"file://$rootfolder/BUILDS/tm/demo_tm\""
505
 
                echo "Output:  \"file://$rootfolder/ENGINES/evals/eval-s=nl-t=en-p=demo_tm-a=giza-g=3-l=demo_lm-T=irstlm-n=3/moses-mert.mteval-v12.pl.txt\""
 
505
        fi
 
506
 
 
507
if [[ ! "${args[@]}" =~ "-r1" ]] ; then
 
508
        echo "Example 13: train an SMT model with BUILD sets named"
 
509
        echo "            \"domy_tm\" and \"domy_lm\""
 
510
        echo
 
511
        echo "Command:  ~\$ $BASE train"
 
512
        echo
 
513
        if [ -f "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'domy' corpus.conf" ]; then
 
514
 
 
515
                if [[ ! "${args[@]}" =~ "-s" ]] ; then
 
516
                        cp "$graphs/train/config.ini" $graphs/train/.config.ini.tmp
 
517
                        cp "$graphs/train/Demo - train, tune, eval nl-en SMT model with 'domy' corpus.conf" $graphs/train/config.ini
 
518
                        [ -n "${time_prefix}" ] && time_prefix="${time_bin} -po $userfiles/time-domy.log "
 
519
                        ${time_prefix} python $cfgm -v train
 
520
                        result=$?
 
521
                        mv $graphs/train/.config.ini.tmp $graphs/train/config.ini
 
522
                        [ $result -ne 0 ] && leave $result
 
523
                        echo
 
524
                        echo "Review"
 
525
                        echo "Command:  ~\$ $BASE train"
 
526
                        echo "Input1:   \"file://$rootfolder/BUILDS/lm/domy_lm\""
 
527
                        echo "Input2:   \"file://$rootfolder/BUILDS/tm/domy_tm\""
 
528
                        echo "Output:  \"file://$rootfolder/ENGINES/evals/eval-s=nl-t=en-p=domy_tm-a=giza-g=3-l=domy_lm-T=irstlm-n=3/moses-mert.mteval-v12.pl.txt\""
 
529
                        echo
 
530
                        fi
 
531
                [[ "${args[@]}" =~ "-r2" ]] && leave 0
 
532
        else
 
533
                echo "\"train\" domy configuration is not installed."
506
534
                echo
507
535
                fi
508
 
else
509
 
        echo "\"train\" demo configuration is not installed."
510
 
        echo
511
536
        fi
512
537
 
 
538
 
513
539
echo "Next: Example 14, translate sample document"
514
540
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
515
541
clear
517
543
echo "Example 14: translate sample document"
518
544
echo
519
545
echo "This example uses the \"translate\" GRAPH to translate a sample"
520
 
echo "document using the \"demo_tm\" translation engine."
 
546
echo "document using the \"domy_tm\" translation engine."
521
547
echo
522
548
echo "Command:  ~\$ $BASE translate"
523
549
echo
528
554
echo
529
555
echo "Command:  ~\$ $BASE translate"
530
556
echo
531
 
if [ -f "$graphs/translate/Demo - translate nl-en with 'micro' and 'demo' SMT and recaser models.conf" ]; then
532
 
 
533
 
 
534
 
        if [[ ! "${args[@]}" =~ "-s" ]]; then
 
557
if [ -f "$graphs/translate/Demo - translate nl-en with 'micro' and 'domy' SMT and recaser models.ini" ]; then
 
558
 
 
559
 
 
560
        if [[ ! "${args[@]}" =~ "-s" ]] ; then
535
561
                cp "$graphs/translate/config.ini" $graphs/translate/.config.ini.tmp
536
 
                cp "$graphs/translate/Demo - translate nl-en with 'micro' and 'demo' SMT and recaser models.conf" $graphs/translate/config.ini
 
562
                cp "$graphs/translate/Demo - translate nl-en with 'micro' and 'domy' SMT and recaser models.ini" $graphs/translate/config.ini
537
563
                python $cfgm -v translate
538
564
                result=$?
539
565
                mv $graphs/translate/.config.ini.tmp $graphs/translate/config.ini
546
572
                echo
547
573
                fi
548
574
else
549
 
        echo "\"translate\" demo configuration is not installed."
 
575
        echo "\"translate\" domy configuration is not installed."
550
576
        echo
551
577
        fi
552
578
 
562
588
echo "Example 6 cleaned a micro corpus for use as SMT training corpus"
563
589
echo "Example 7 created the lm BUILD set named \"micro_lm\""
564
590
echo "Example 8 created the tm BUILD set named \"micro_tm\""
565
 
echo "Example 9 trained, tuned, evaluated a \"micro\" translation model"
566
 
echo "Example 10 cleaned a demo corpus from same data with different options"
567
 
echo "Example 11 created the lm BUILD set named \"demo_lm\""
568
 
echo "Example 12 created the tm BUILD set named \"demo_tm\""
569
 
echo "Example 13 trained, tune, evaluate a \"demo\" translation model"
 
591
echo "Example 9 trained, tuned, evaluated a \"micro\" SMT model"
 
592
echo "Example 10 cleaned a domy corpus from same data with different options"
 
593
echo "Example 11 created the lm BUILD set named \"domy_lm\""
 
594
echo "Example 12 created the tm BUILD set named \"domy_tm\""
 
595
echo "Example 13 trained, tune, evaluate a \"domy\" SMT model"
570
596
echo "Example 14 translated sample document with an engine using both models"
571
597
 
572
598
echo "Next: NOTES"
574
600
clear
575
601
 
576
602
echo "NOTE: All outputs from these examples were saved in sub-folders"
577
 
echo "under \"$rootfolder/BUILDS/\", \"$rootfolder/ENGINES/\", and"
578
 
echo "\"$rootfolder/TRANSLATIONS/\". It is safe to delete the demo"
579
 
echo "sub-folders at any time."
 
603
echo "  under \"$rootfolder/BUILDS/\", \"$rootfolder/ENGINES/\", and"
 
604
echo "  \"$rootfolder/TRANSLATIONS/\". To delete these demo files after"
 
605
echo "  running \"demo-2.sh\" in DoMY CE, run \"demo-2.sh -clean\"."
580
606
echo
581
607
echo "NOTE 2: You may want to rerun demo-2.sh. If you first delete the"
582
608
echo "  output folders, you can watch the demo create the output files"
604
630
 
605
631
leave 0
606
632
 
607
 
# DoMY™ CE v2.5.335
 
633
# DoMY™ CE v2.5.339
608
634
# Copyright © 2010-2013 Precision Translation Tools Co., Ltd.
609
635
 
610
636
# This program is free software: you can redistribute it and/or modify