3
# Load the translation dictionary
4
# Each line of the dictionary files should look like:
7
# Any entry which has a stop words as the source word
11
# $dict_fn: file name of the translation dictionary
12
# $xstop_href: reference to a hash table of the source language stop words
13
# $dict_href: reference to the hash table of loaded dictionary;
14
# the hash table is a hash of arrays with the keys being
15
# the the source word, the value being an array of possible
17
$| = 1; # disable Perl output buffering
19
my ($dict_fn, $xstop_href, $dict_href) = @_;
21
print STDERR "Reading seed translation lexicon...";
22
open D, "<$dict_fn" || die "$0: Couldn't open $dict_fn!\n";
25
if (/^\s*(.+?)\s*<>\s*(.+?)\s*$/) {
29
# lowercase the source words; should be comment out
30
# for multiple-byte encodings (such as Big5 Chinese)
31
# which overlaps with ASCII
32
$source =~ tr/[A-Z]/[a-z]/;
34
# discard entries with stop words
35
next if defined $$xstop_href{$source};
37
#$translation =~ s/\W/\\$&/g;
38
push @{$dict{"$source"}}, $translation;
40
print STDERR "invalid dictionary entry:\n$_\n";
44
print STDERR " done.\n";
45
print STDERR "Number of entries: ", scalar keys %dict, "\n";