4
[ -L "${0}" ] && DOMY=`readlink ${0}`
5
pushd ${DOMY%/*}/.. >/dev/null; export DOMY=`pwd -P`; popd >/dev/null
8
if [ ! -x "$cfgm" ] ; then
9
echo "* $(date +%x\ %r) - ${0##*/} - installation corrupted - reinstall required"
13
if [ $? -ne 0 ] ; then echo "* $(date +%x\ %r) - ${0##*/} - configuration failure - reinstallation required" ; exit 1 ; fi
14
if [ ! -e "$HOME/domy/$BASE.ini" ] ; then echo "* $(date +%x\ %r) - ${0##*/} - configuration failure - reinstallation required" ; exit 1 ; fi
16
if [[ "${line}" =~ "=" ]] ; then eval "$line" ; fi
17
done < "$HOME/domy/$BASE.ini"
22
Usage: ${0##*/} [OPTION...]
23
Run the first demo wizard
26
-c continue without pausing to read prompts
31
This command is useful to measure end-to-end performance.
37
[[ "${args[@]}" =~ "-h" ]] && usage 0
38
[[ "${args[@]}" =~ "--help" ]] && usage 0
44
echo "This is the first of two demos. This demo runs five (5) example"
45
echo "\"GRAPHS\". GRAPHS are parallel tool-chains that extract, clean,"
46
echo "align parallel text, and otherwise prepare corpora and translation"
47
echo "memory data for use as SMT training data."
49
echo "Example 1 converts HTML entities to UTF-8 characters"
50
echo "Example 2 creates sentence-aligned files from file-aligned data"
51
echo "Example 3 imports data from TMX files to CorpusFiltergraph"
52
echo "Example 4 exports data from CorpusFiltergraph to TMX files"
53
echo "Example 5 extracts bilingual data from individual files"
55
echo "You can find the GRAPHs and their config.ini files in:"
59
echo "Next: Instructions"
60
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
65
echo "This is a wizar-style demo. It pauses before running an example"
66
echo "with a short description and the GRAPH's command line."
68
echo "It pauses after running the example to show the command line,"
69
echo "the input and output of the example. Right-click the input or"
70
echo "output path and select \"Open Link\" to see the changes."
73
echo "Next: Example 1, clean HTML/XML entities"
74
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
77
echo "Example 1: clean HTML/XML entities"
79
echo "Cleaning HTML/XML data is an important task for all data preparation."
80
echo "HTML and XML documents often include \"entities\" that represent characters."
81
echo "For example, < is \"<\" and > is \">\" and á is \"á\". This GRAPH"
82
echo "converts these \"entities\" back to \"normal\" characters."
84
echo "Command: ~\$ $BASE clean-lm"
87
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" to start Example 1..."
90
echo "Example 1: clean HTML/XML entities"
92
if [ -f "$graphs/clean-lm/config.demo.ini" ]; then
93
cp $graphs/clean-lm/config.ini $graphs/clean-lm/.config.ini.tmp
94
cp $graphs/clean-lm/config.demo.ini $graphs/clean-lm/config.ini
95
python $cfgm -v clean-lm
97
mv $graphs/clean-lm/.config.ini.tmp $graphs/clean-lm/config.ini
98
[ $result -ne 0 ] && exit 1
101
echo "Command: ~\$ $BASE clean-lm"
102
echo "Input: \"file://$rootfolder/CORPORA/sa/Examples/Unicode/entities/lm/en/en/character-entities.txt\""
103
echo "Output: \"file://$rootfolder/CORPORA.demo/qc/Examples/Unicode/entities/lm/en/en/character-entities.txt\""
106
echo "\"clean-lm\" demo configuration is not installed."
109
echo "Next: Example 2, create sentence-aligned data from aligned files"
110
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
113
echo "Example 2: create sentence-aligned data from aligned files"
115
echo "This GRAPH starts with file-aligned data (fa) with one sentence"
116
echo "or phrase per line. The GRAPH matches source and target language"
117
echo "sentences to the (tm) output. Sentences without a match are saved"
118
echo "to the language model (lm) output."
120
echo "Command: ~\$ $BASE sa-champollion"
122
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" to start Example 2..."
125
echo "Example 2: create sentence-aligned data from aligned files"
127
echo "Command: ~\$ $BASE sa-champollion"
129
if [ -f "$graphs/sa-champollion/config.demo.ini" ]; then
130
cp $graphs/sa-champollion/config.ini $graphs/sa-champollion/.config.ini.tmp
131
cp $graphs/sa-champollion/config.demo.ini $graphs/sa-champollion/config.ini
132
python $cfgm -v sa-champollion
134
mv $graphs/sa-champollion/.config.ini.tmp $graphs/sa-champollion/config.ini
135
[ $result -ne 0 ] && exit 1
138
echo "Command: ~\$ $BASE sa-champollion"
139
echo "Input1: \"file://$rootfolder/CORPORA/fa/Examples/News/general/tm/zh_cn/zh_cn/CPP20000210000021.txt\""
140
echo "Input2: \"file://$rootfolder/CORPORA/fa/Examples/News/general/tm/zh_cn/en/CPP20000210000021.txt\""
141
echo "Output1: \"file://$rootfolder/CORPORA.demo/sa/Examples/News/general/tm/zh_cn/zh_cn/CPP20000210000021.txt\""
142
echo "Output2: \"file://$rootfolder/CORPORA.demo/sa/Examples/News/general/tm/zh_cn/en/CPP20000210000021.txt\""
144
echo "\"sa-champollion\" demo configuration is not installed."
147
echo "Next: Example 3, import data from TMX files"
148
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
151
echo "Example 3: import data TMX files"
153
echo "TMX (translation memory exchange) files were developed to exchange"
154
echo "translation memory data between companies, colleagues and systems."
155
echo "This GRAPH imports aligned tmx data into the CorpusFiltergraph"
156
echo "folder hierarchy. These sample TMX files come from the DGT-TM"
157
echo "version 1 and maintain sentence alignment across 13 languages."
159
echo "Command: ~\$ $BASE import-tmx"
161
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" to start Example 3..."
164
echo "Example 3: import data TMX files"
166
echo "Command: ~\$ $BASE import-tmx"
168
if [ -f "$graphs/import-tmx/config.demo.ini" ]; then
169
cp $graphs/import-tmx/config.ini $graphs/import-tmx/.config.ini.tmp
170
cp $graphs/import-tmx/config.demo.ini $graphs/import-tmx/config.ini
171
python $cfgm -v import-tmx
173
mv $graphs/import-tmx/.config.ini.tmp $graphs/import-tmx/config.ini
174
[ $result -ne 0 ] && exit 1
177
echo "Command: ~\$ $BASE import-tmx"
178
echo "Input1: \"file://$rootfolder/RAW/European%20Commission/DGT_TM_1.0/Volume_1/21985A0705(01).tmx\""
179
echo "Input2: \"file://$rootfolder/RAW/European%20Commission/DGT_TM_1.0/Volume_1/22001D0118(18).tmx\""
180
echo "Output1: \"file://$rootfolder/CORPORA.demo/sa/European%20Commission/DGT_TM_1.0/Volume_1/tm/en_gb/\""
183
echo "\"import-tmx\" demo configuration is not installed."
186
echo "Next: Example 4, export data to TMX files"
187
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
190
echo "Example 4: export data to TMX files"
192
echo "This GRAPH exports tm data to TMX files. Data from the CorpusFiltergarph"
193
echo "folder hierarchy can be shared with other translation systems."
195
echo "Command: ~\$ $BASE export-tmx"
197
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" to start Example 4..."
200
echo "Example 4: export data to TMX files"
202
echo "Command: ~\$ $BASE export-tmx"
204
if [ -f "$graphs/export-tmx/config.demo.ini" ]; then
205
cp $graphs/export-tmx/config.ini $graphs/export-tmx/.config.ini.tmp
206
cp $graphs/export-tmx/config.demo.ini $graphs/export-tmx/config.ini
207
python $cfgm -v export-tmx
209
mv $graphs/export-tmx/.config.ini.tmp $graphs/export-tmx/config.ini
210
[ $result -ne 0 ] && exit 1
213
echo "Command: ~\$ $BASE export-tmx"
214
echo "Inputs: \"file://$rootfolder/CORPORA.demo/sa/European%20Commission/DGT_TM_1.0/Volume_1/tm/en_gb/\""
215
echo "Output1: \"file://$rootfolder/RAW.demo/European%20Commission/DGT_TM_1.0/Volume_1/tm/en_gb/21985A0705(01).tmx.txt.tmx\""
216
echo "Output2: \"file://$rootfolder/RAW.demo/European%20Commission/DGT_TM_1.0/Volume_1/tm/en_gb/22001D0118(18).tmx.txt.tmx\""
219
echo "\"export-tmx\" demo configuration is not installed."
222
echo "Next: Example 5, extract bilingual dictionaries from individual files"
223
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
226
echo "Example 5: extract bilingual dictionaries from individual files"
228
echo "This is an example of a custom GRAPH. Users and PTTools can create"
229
echo "custom GRAPHS for special data-cleaning purposes. This example's"
230
echo "input file has parenthetical entries in-line with the main text."
231
echo "This GRAPH removes the parenthetical entries from the original files"
232
echo "and saves them as new parallel dictionary files."
234
echo "Command: ~\$ $BASE extract-dictionary"
236
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" to start Example 5..."
239
echo "Example 5: extract bilingual dictionaries from individual files"
241
echo "Command: ~\$ $BASE extract-dictionary"
243
if [ -f "$graphs/extract-dictionary/config.demo.ini" ]; then
244
cp $graphs/extract-dictionary/config.ini $graphs/extract-dictionary/.config.ini.tmp
245
cp $graphs/extract-dictionary/config.demo.ini $graphs/extract-dictionary/config.ini
246
python $cfgm -v extract-dictionary
248
mv $graphs/extract-dictionary/.config.ini.tmp $graphs/extract-dictionary/config.ini
249
[ $result -ne 0 ] && exit 1
252
echo "Command: ~\$ $BASE extract-dictionary"
253
echo "Input1: \"file://$rootfolder/CORPORA/sa/Examples/Legislation/statutes/tm/zh_hk/zh_hk/hklaws.txt\""
254
echo "Input2: \"file://$rootfolder/CORPORA/sa/Examples/Legislation/statutes/tm/zh_hk/en/hklaws.txt\""
255
echo "Outputs: \"file://$rootfolder/CORPORA.demo/qc/Examples/Legislation/statutes/\""
256
echo "Output1: \"file://$rootfolder/CORPORA.demo/qc/Examples/Legislation/statutes/tm/zh_hk/zh_hk/hklaws.txt\""
257
echo "Output2: \"file://$rootfolder/CORPORA.demo/qc/Examples/Legislation/statutes/tm/zh_hk/en/hklaws.txt\""
258
echo "Output3: \"file://$rootfolder/CORPORA.demo/qc/Examples/Legislation/statutes/tm-dictionary/zh_hk/zh_hk/hklaws.txt\""
259
echo "Output4: \"file://$rootfolder/CORPORA.demo/qc/Examples/Legislation/statutes/tm-dictionary/zh_hk/en/hklaws.txt\""
262
echo "\"extract-dictionary\" demo configuration is not installed."
266
[[ ! "${args[@]}" =~ "-c" ]] && read -n 1 -p "Press \"Enter\" continue..."
271
echo "Examples in Demo-1.sh extracted, cleaned and aligned parallel text data."
273
echo "Example 1 converted HTML entities to UTF-8 characters"
274
echo "Example 2 created sentence-aligned files from file-aligned data"
275
echo "Example 3 imported data from TMX files to CorpusFiltergraph"
276
echo "Example 4 exported data from CorpusFiltergraph to TMX files"
277
echo "Example 5 extracted bilingual data from individual files"
279
echo "NOTE: All outputs were saved to sub-folders in"
280
echo " \"$rootfolder/CORPORA.demo/*\" or \"$rootfolder/RAW.demo/*\"."
281
echo " The examples in the next demo, \"demo-2.sh,\" continue with the"
282
echo " outputs from this demo. It is safe to delete these demo root"
283
echo " folders after running \"demo-2.sh\"."
285
echo "Demo-2.sh prepares two data BUILD sets from the cleaned data and"
286
echo "trains two translation engines. You can start then next demo with"
287
echo "the command \"demo-2.sh\""
290
echo "Next: Repeat demo-1.sh or run demo-2.sh"
292
#CorpusFiltergraph™ v4.0.264
293
#Copyright © 2010-2012 Precision Translation Tools Co., Ltd.
295
#This program is free software: you can redistribute it and/or modify
296
#it under the terms of the GNU Lesser General Public License as published by
297
#the Free Software Foundation, either version 3 of the License, or
298
#(at your option) any later version.
300
#This program is distributed in the hope that it will be useful,
301
#but WITHOUT ANY WARRANTY; without even the implied warranty of
302
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
303
#GNU Lesser General Public License for more details.
305
#You should have received a copy of the GNU Lesser General Public License
306
#along with this program. If not, see http://www.gnu.org/licenses/.
308
#For more information, please contact Precision Translation Tools Co., Ltd.
309
#at: http://www.precisiontranslationtools.com