86
92
echo This second demo runs example "GRAPHS" 6 to 14 to consolidate data,
87
echo train translation models, evaluate translation models and translate
88
echo documents using both translation models.
93
echo train two SMT models, evaluate SMT models and
94
echo translate a document using both SMT models.
90
96
echo Example 6 cleans a micro corpus for use as SMT training corpus
91
97
echo Example 7 creates the lm BUILD set named "micro_lm"
92
98
echo Example 8 creates the tm BUILD set named "micro_tm"
93
echo Example 9 trains, tunes, evaluates a "micro" translation model [Linux only]
94
echo Example 10 cleans a demo corpus from same data with different options
95
echo Example 11 creates the lm BUILD set named "demo_lm"
96
echo Example 12 creates the tm BUILD set named "demo_tm"
97
echo Example 13 trains, tunes, evaluates a "demo" translation model [Linux only]
99
echo Example 9 trains, tunes, evaluates a "micro" SMT model [Linux only]
100
echo Example 10 cleans a domy corpus from same data with different options
101
echo Example 11 creates the lm BUILD set named "domy_lm"
102
echo Example 12 creates the tm BUILD set named "domy_tm"
103
echo Example 13 trains, tunes, evaluates a "domy" SMT model [Linux only]
98
104
echo Example 14 translates sample document with both models [Linux only]
100
106
echo You can find the GRAPHs and their config.ini files in:
152
158
echo Command: "%~d0> %BASE% clean-tm"
153
159
echo Inputs: "file://%rootfolder%\CORPORA\sa\EuroParl%20v5\Legislation\debates\tm\nl\"
154
echo Outputs1: "file://%rootfolder%\CORPORA.demo\ready_micro\EuroParl%20v5\Legislation\debates\tm\nl\"
155
echo Outputs2: "file://%rootfolder%\CORPORA.demo\ready_micro-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
160
echo Outputs1: "file://%rootfolder%\CORPORA\ready_micro\EuroParl%20v5\Legislation\debates\tm\nl\"
161
echo Outputs2: "file://%rootfolder%\CORPORA\ready_micro-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
158
164
copy /y "%graphs%\clean-tm\.config.ini.tmp" "%graphs%\clean-tm\config.ini" > nul
173
179
echo Example 7: create an lm BUILD set named "micro_lm"
175
echo Training a translation model requires a language model "BUILD" set
181
echo Training an SMT model requires a language model "BUILD" set
176
182
echo created. here, and a tm "BUILD" set created in Examples 10 and 11 below.
177
183
echo This GRAPH selects and consolidates target language data to create both
178
184
echo a language model and to train a "recaser" model.
198
204
echo Command: "%~d0> %BASE% build-lm"
199
echo Inputs1: "file://%rootfolder%\CORPORA.demo\ready_demo\EuroParl%20v5\Legislation\debates\tm\nl\"
200
echo Inputs2: "file://%rootfolder%\CORPORA.demo\ready_demo-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
201
echo Inputs3: "file://%rootfolder%\CORPORA.demo\ready_demo-workbench\EuroParl%20v5\Legislation\debates\lm\nl\"
205
echo Inputs1: "file://%rootfolder%\CORPORA\ready_domy\EuroParl%20v5\Legislation\debates\tm\nl\"
206
echo Inputs2: "file://%rootfolder%\CORPORA\ready_domy-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
207
echo Inputs3: "file://%rootfolder%\CORPORA\ready_domy-workbench\EuroParl%20v5\Legislation\debates\lm\nl\"
202
208
echo Output: "file://%rootfolder%\BUILDS\lm\micro_lm\"
220
226
echo Example 8: create a tm BUILD set named "micro_tm"
222
echo The example GRAPHS in demo-1.sh processed all data in their original,
228
echo The example GRAPHS in clean-tm processed all data in their original,
223
229
echo separate files. This allows users to maintain data categories during
224
230
echo processing. The separate data files must be consolidated before
225
231
echo training. The "BUILD" process selects, consolidates the data. This
226
232
echo GRAPH creates a tm "BUILD" set with 12 files from the "micro"
227
echo SMT data set created in demo-1.sh. This GRAPH's config file defines:
233
echo SMT data set created by clean-tm. This GRAPH's config file defines:
228
234
echo mertset = 300
229
235
echo evalset = 300
249
255
echo Command: "%~d0> %BASE% build-tm"
250
echo Inputs: "file://%rootfolder%\CORPORA.demo\ready_demo\EuroParl%20v5\Legislation\debates\tm\nl\"
256
echo Inputs: "file://%rootfolder%\CORPORA\ready_domy\EuroParl%20v5\Legislation\debates\tm\nl\"
251
257
echo Outputs: "file://%rootfolder%\BUILDS\tm\micro_tm"
266
echo Next: Example 9, train a translation model with BUILD sets named "micro_tm" and "micro_lm"
272
echo Next: Example 9, train an SMT model with BUILD sets named "micro_tm" and "micro_lm"
267
273
if not "%1" == "-c" pause
270
echo Example 9: train a translation model with BUILD sets named
276
echo Example 9: train an SMT model with BUILD sets named
271
277
echo "micro_tm" and "micro_lm"
273
279
echo The training process consists of five sub-steps:
274
280
echo 1. train-lm - trains the language model
275
281
echo 2. train-tm - trains the phrase and reordering tables, then
276
282
echo binarizes the tables This takes a long time.
277
echo 3. train-mert - "Minimum Error Rate Tuning" creates a translation
283
echo 3. train-mert - "Minimum Error Rate Tuning" creates an SMT
278
284
echo model consisting of phrase and reordering tables,
279
285
echo a language model and configuration file with
280
286
echo optimal settings. This takes a long time.
281
echo 4. train-eval - evaluates the "tuned" translation model
287
echo 4. train-eval - evaluates the "tuned" SMT model
282
288
echo 5. train-recaser - trains a recaser model that restores
283
289
echo upper\lower case to translations
328
echo Next: Example 10, prepare nl-en sample data [demo set]
334
echo Next: Example 10, prepare nl-en sample data [domy set]
329
335
if not "%1" == "-c" pause
332
echo Example 10: prepare nl-en sample data [demo set]
338
echo Example 10: prepare nl-en sample data [domy set]
334
340
echo This GRAPH starts with the same data as Example 6. New "mintoken",
335
341
echo and "maxtoken" settings create a larger SMT training data set. This
349
echo Example 10: prepare nl-en sample data [demo set]
355
echo Example 10: prepare nl-en sample data [domy set]
351
357
echo Command: "%~d0> %BASE% clean-tm"
353
if exist "%graphs%\clean-tm\Demo - clean nl-en 'demo' parallel corpus.conf" (
359
if exist "%graphs%\clean-tm\Demo - clean nl-en 'domy' parallel corpus.conf" (
354
360
copy /y "%graphs%\clean-tm\config.ini" "%graphs%\clean-tm\.config.ini.tmp" > nul
355
copy /y "%graphs%\clean-tm\Demo - clean nl-en 'demo' parallel corpus.conf" "%graphs%\clean-tm\config.ini" > nul
361
copy /y "%graphs%\clean-tm\Demo - clean nl-en 'domy' parallel corpus.conf" "%graphs%\clean-tm\config.ini" > nul
356
362
python "%cfgm%" -v clean-tm
357
363
if errorlevel 1 (
362
368
echo Command: "%~d0> %BASE% clean-tm"
363
369
echo Inputs: "file://%rootfolder%\CORPORA\sa\EuroParl%20v5\Legislation\debates\tm\nl\"
364
echo Outputs1: "file://%rootfolder%\CORPORA.demo\ready_demo\EuroParl%20v5\Legislation\debates\tm\nl\"
365
echo Outputs2: "file://%rootfolder%\CORPORA.demo\ready_demo-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
370
echo Outputs1: "file://%rootfolder%\CORPORA\ready_domy\EuroParl%20v5\Legislation\debates\tm\nl\"
371
echo Outputs2: "file://%rootfolder%\CORPORA\ready_domy-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
368
374
copy /y "%graphs%\clean-tm\.config.ini.tmp" "%graphs%\clean-tm\config.ini" > nul
369
375
del /q "%graphs%\clean-tm\.config.ini.tmp"
371
echo "clean-tm" demo configuration is not installed.
377
echo "clean-tm" domy configuration is not installed.
374
380
if not %result%==0 (
379
echo Next: Example 11, create an lm BUILD set named "demo_lm"
385
echo Next: Example 11, create an lm BUILD set named "domy_lm"
380
386
if not "%1" == "-c" pause
383
echo Example 11: create an lm BUILD set named "demo_lm"
389
echo Example 11: create an lm BUILD set named "domy_lm"
385
echo Training a translation model requires a language model "BUILD" set
391
echo Training an SMT model requires a language model "BUILD" set
386
392
echo created. here, and a tm "BUILD" set created in Examples 10 and 11 below.
387
393
echo This GRAPH selects and consolidates target language data to create both
388
394
echo a language model and to train a "recaser" model.
396
echo Example 11: create an lm BUILD set named "demo_lm"
402
echo Example 11: create an lm BUILD set named "domy_lm"
398
404
echo Command: "%~d0> %BASE% build-lm"
400
if exist "%graphs%\build-lm\Demo - build nl-en 'LM BUILD set' with 'demo' corpus.conf" (
406
if exist "%graphs%\build-lm\Demo - build nl-en 'LM BUILD set' with 'domy' corpus.conf" (
401
407
copy /y "%graphs%\build-lm\config.ini" "%graphs%\build-lm\.config.ini.tmp" > nul
402
copy /y "%graphs%\build-lm\Demo - build nl-en 'LM BUILD set' with 'demo' corpus.conf" "%graphs%\build-lm\config.ini" > nul
408
copy /y "%graphs%\build-lm\Demo - build nl-en 'LM BUILD set' with 'domy' corpus.conf" "%graphs%\build-lm\config.ini" > nul
403
409
python "%cfgm%" -v build-lm
404
410
if errorlevel 1 (
409
415
echo Command: "%~d0> %BASE% build-lm"
410
echo Inputs1: "file://%rootfolder%\CORPORA.demo\ready_demo\EuroParl%20v5\Legislation\debates\tm\nl\"
411
echo Inputs2: "file://%rootfolder%\CORPORA.demo\ready_demo-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
412
echo Inputs3: "file://%rootfolder%\CORPORA.demo\ready_demo-workbench\EuroParl%20v5\Legislation\debates\lm\nl\"
413
echo Output: "file://%rootfolder%\BUILDS\lm\demo_lm\"
416
echo Inputs1: "file://%rootfolder%\CORPORA\ready_domy\EuroParl%20v5\Legislation\debates\tm\nl\"
417
echo Inputs2: "file://%rootfolder%\CORPORA\ready_domy-workbench\EuroParl%20v5\Legislation\debates\tm\nl\"
418
echo Inputs3: "file://%rootfolder%\CORPORA\ready_domy-workbench\EuroParl%20v5\Legislation\debates\lm\nl\"
419
echo Output: "file://%rootfolder%\BUILDS\lm\domy_lm\"
419
425
copy /y "%graphs%\build-lm\.config.ini.tmp" "%graphs%\build-lm\config.ini" > nul
420
426
del /q "%graphs%\build-lm\.config.ini.tmp"
422
echo "build-lm" demo configuration is not installed.
428
echo "build-lm" domy configuration is not installed.
425
431
if not %result%==0 (
430
echo Next: Example 12, create a tm BUILD set named "demo_tm"
436
echo Next: Example 12, create a tm BUILD set named "domy_tm"
431
437
if not "%1" == "-c" pause
434
echo Example 12: create a tm BUILD set named "demo_tm"
440
echo Example 12: create a tm BUILD set named "domy_tm"
436
442
echo This GRAPH selectes and consolidates the larger SMT data set created
437
echo in demo-1.sh and creates a tm "BUILD" set of 12 files. This
443
echo in clean-tm and creates a tm "BUILD" set of 12 files. This
438
444
echo GRAPH's config file defines:
439
445
echo mertset = 500
440
446
echo evalset = 500
453
echo Example 12: create a tm BUILD set named "demo_tm"
459
echo Example 12: create a tm BUILD set named "domy_tm"
455
461
echo Command: "%~d0> %BASE% build-tm"
457
if exist "%graphs%\build-tm\Demo - build nl-en 'TM BUILD set' with 'demo' parallel corpus.conf" (
463
if exist "%graphs%\build-tm\Demo - build nl-en 'TM BUILD set' with 'domy' parallel corpus.conf" (
458
464
copy /y "%graphs%\build-tm\config.ini" "%graphs%\build-tm\.config.ini.tmp" > nul
459
copy /y "%graphs%\build-tm\Demo - build nl-en 'TM BUILD set' with 'demo' parallel corpus.conf" "%graphs%\build-tm\config.ini" > nul
465
copy /y "%graphs%\build-tm\Demo - build nl-en 'TM BUILD set' with 'domy' parallel corpus.conf" "%graphs%\build-tm\config.ini" > nul
460
466
python "%cfgm%" -v build-tm
461
467
if errorlevel 1 (
466
472
echo Command: "%~d0> %BASE% build-tm"
467
echo Inputs: "file://%rootfolder%\CORPORA.demo\ready_demo\EuroParl%20v5\Legislation\debates\tm\nl\"
468
echo Outputs: "file://%rootfolder%\BUILDS\tm\demo_tm"
473
echo Inputs: "file://%rootfolder%\CORPORA\ready_domy\EuroParl%20v5\Legislation\debates\tm\nl\"
474
echo Outputs: "file://%rootfolder%\BUILDS\tm\domy_tm"
471
477
copy /y "%graphs%\build-tm\.config.ini.tmp" "%graphs%\build-tm\config.ini" > nul
472
478
del /q "%graphs%\build-tm\.config.ini.tmp"
474
echo "build-tm" demo configuration is not installed.
480
echo "build-tm" domy configuration is not installed.
477
483
if not %result%==0 (
482
echo Next: Example 13, train a translation model with BUILD sets named "demo_tm" and "demo_lm"
488
echo Next: Example 13, train an SMT model with BUILD sets named "domy_tm" and "domy_lm"
483
489
if not "%1" == "-c" pause
486
echo Example 13: train a translation model with BUILD sets named
487
echo "demo_tm" and "demo_lm"
492
echo Example 13: train an SMT model with BUILD sets named
493
echo "domy_tm" and "domy_lm"
489
495
echo The training process consists of five sub-steps:
490
496
echo 1. train-lm - trains the language model
496
502
echo optimal settings. This takes a long time.
497
503
echo 5. train-recaser - trains a recaser model that restores
498
504
echo upper/lower case to translations
499
echo 4. train-eval - evaluates the "tuned" translation model
505
echo 4. train-eval - evaluates the "tuned" SMT model
506
512
if not "%1" == "-c" pause
509
echo Example 13: train a translation model with BUILD sets named
510
echo "demo_tm" and "demo_lm"
515
echo Example 13: train an SMT model with BUILD sets named
516
echo "domy_tm" and "domy_lm"
512
518
echo Command: "%~d0> %BASE% train"
514
if exist "%graphs%\train\Demo - train, tune, eval nl-en SMT model with 'demo' corpus.conf" (
520
if exist "%graphs%\train\Demo - train, tune, eval nl-en SMT model with 'domy' corpus.conf" (
515
521
echo Can not run Example 13 on MS Windows
517
523
rem if not "%1" == "-s" (
518
524
rem copy /y "%graphs%\train\config.ini" "%graphs%\train\.config.ini.tmp" > nul
519
rem copy /y "%graphs%\train\Demo - train, tune, eval nl-en SMT model with 'demo' corpus.conf" "%graphs%\train\config.ini" > nul
525
rem copy /y "%graphs%\train\Demo - train, tune, eval nl-en SMT model with 'domy' corpus.conf" "%graphs%\train\config.ini" > nul
520
526
rem python "%cfgm%" -v train
521
527
rem if errorlevel 1 (
526
532
rem echo Command: "%~d0> %BASE% train"
527
rem echo Input1: "file://%rootfolder%\BUILDS\lm\demo_lm"
528
rem echo Input2: "file://%rootfolder%\BUILDS\tm\demo_tm"
529
rem echo Output: "file://%rootfolder%\ENGINES\evals\eval-s=nl-t=en-p=demo_tm-a=giza-g=3-l=demo_lm-T=irstlm-n=3\moses-mert.mteval-v12.pl.txt"
533
rem echo Input1: "file://%rootfolder%\BUILDS\lm\domy_lm"
534
rem echo Input2: "file://%rootfolder%\BUILDS\tm\domy_tm"
535
rem echo Output: "file://%rootfolder%\ENGINES\evals\eval-s=nl-t=en-p=domy_tm-a=giza-g=3-l=domy_lm-T=irstlm-n=3\moses-mert.mteval-v12.pl.txt"
532
538
rem copy /y "%graphs%\train\.config.ini.tmp" "%graphs%\train\config.ini" > nul
533
539
rem del /q "%graphs%\train\.config.ini.tmp"
536
echo "train" demo configuration is not installed.
542
echo "train" domy configuration is not installed.
539
545
if not %result%==0 (
548
554
echo Example 14: translate sample document
550
556
echo This example uses the "translate" GRAPH to translate a sample
551
echo document using the "demo_tm" translation engine.
557
echo document using the "domy_tm" translation engine.
553
559
echo Command: "%~d0> %BASE% translate"
560
566
echo Command: "%~d0> %BASE% translate"
562
if exist "%graphs%\translate\Demo - translate nl-en with 'micro' and 'demo' SMT and recaser models.conf" (
568
if exist "%graphs%\translate\Demo - translate nl-en with 'micro' and 'domy' SMT and recaser models.conf" (
563
569
echo Can not run Example 14 on MS Windows
565
571
rem copy /y "%graphs%\translate\config.ini" "%graphs%\translate\.config.ini.tmp" > nul
566
rem copy /y "%graphs%\translate\Demo - translate nl-en with 'micro' and 'demo' SMT and recaser models.conf" "%graphs%\translate\config.ini" > nul
572
rem copy /y "%graphs%\translate\Demo - translate nl-en with 'micro' and 'domy' SMT and recaser models.ini" "%graphs%\translate\config.ini" > nul
567
573
rem python "%cfgm%" -v translate
568
574
rem if errorlevel 1 (
579
585
rem copy /y "%graphs%\translate\.config.ini.tmp" "%graphs%\translate\config.ini" > nul
580
586
rem del /q "%graphs%\translate\.config.ini.tmp"
582
echo "translate" demo configuration is not installed.
588
echo "translate" domy configuration is not installed.
585
591
if not %result%==0 (
599
605
echo Example 6 cleaned a micro corpus for use as SMT training corpus
600
606
echo Example 7 created the lm BUILD set named "micro_lm"
601
607
echo Example 8 created the tm BUILD set named "micro_tm"
602
echo Example 9 trained, tuned, evaluated a "micro" translation model [Linux only]
603
echo Example 10 cleaned a demo corpus from same data with different options
604
echo Example 11 created the lm BUILD set named "demo_lm"
605
echo Example 12 created the tm BUILD set named "demo_tm"
606
echo Example 13 trained, tune, evaluate a "demo" translation model [Linux only]
608
echo Example 9 trained, tuned, evaluated a "micro" SMT model [Linux only]
609
echo Example 10 cleaned a domy corpus from same data with different options
610
echo Example 11 created the lm BUILD set named "domy_lm"
611
echo Example 12 created the tm BUILD set named "domy_tm"
612
echo Example 13 trained, tune, evaluate a "domy" SMT model [Linux only]
607
613
echo Example 14 translated sample document with an engine using both models [Linux only]
613
619
echo NOTE: All outputs from these examples were saved in sub-folders
614
echo under "%rootfolder%\BUILDS\", "%rootfolder%\ENGINES\", and
615
echo "%rootfolder%\TRANSLATIONS\". It is safe to delete the demo
616
echo sub-folders at any time.
620
echo under "%rootfolder%\BUILDS\", "%rootfolder%\ENGINES\", and
621
echo "%rootfolder%\TRANSLATIONS\". To delete these demo files after
622
echo running "demo-2.bat" in DoMY CE, run "demo-2.bat -clean".
618
echo NOTE 2: You may want to rerun demo-2.sh. If you first delete the
624
echo NOTE 2: You may want to rerun demo-2.bat. If you first delete the
619
625
echo output folders, you can watch the demo create the output files
620
626
echo and review the results of each example before proceeding to
621
627
echo the next example. If you do not delete the output, the sub-steps
647
653
if %deleteplugins%==1 ( rd /s /q "%plugins%" 2>nul )
657
if exist "%rootfolder%\CORPORA\ready_micro\" if exist "%rootfolder%\" rd /s /q "%rootfolder%\CORPORA\ready_micro\"
658
if exist "%rootfolder%\CORPORA\ready_micro-workbench\" rd /s /q "%rootfolder%\CORPORA\ready_micro-workbench\"
659
if exist "%rootfolder%\CORPORA\ready_demo\" rd /s /q "%rootfolder%\CORPORA\ready_demo\"
660
if exist "%rootfolder%\CORPORA\ready_demo-workbench\" rd /s /q "%rootfolder%\CORPORA\ready_demo-workbench\"
661
rem if exist "%rootfolder%\BUILDS\lm\micro_lm\" rd /s /q "%rootfolder%\BUILDS\lm\micro_lm\"
662
rem if exist "%rootfolder%\BUILDS\tm\micro_tm\" rd /s /q "%rootfolder%\BUILDS\tm\micro_tm\"
663
rem if exist "%rootfolder%\BUILDS\lm\demo_lm\" rd /s /q "%rootfolder%\BUILDS\lm\demo_lm\"
664
rem if exist "%rootfolder%\BUILDS\tm\demo_tm\" rd /s /q "%rootfolder%\BUILDS\tm\demo_tm\"
651
668
:: Copyright � 2010-2013 Precision Translation Tools Co., Ltd.
653
670
:: This program is free software: you can redistribute it and\or modify