1
$| = 1; # disable Perl output buffering
23
if (defined $score{"$x,$y"}) {
24
return $score{"$x,$y"};
31
my ($x, $y, $nx, $ny) = @_;
32
my ($s1, $s2, $s3, $s4, $s5, $s6, $s7, $s8, $s9, $s10, $s11, $s12);
33
my ($i, $j, $oi, $oj, $si, $sj, $smax);
34
my ($im1, $im2, $im3, $im4, $jm1, $jm2, $jm3, $jm4);
37
for($j = 0; $j <= $ny; $j++) {
38
my $center = int($j * $xyratio);
39
$window_start = $center-$window_size>0?$center-$window_size:0;
40
$window_end = $center+$window_size<$nx?$center+$window_size:$nx;
41
#print "$window_start $window_end\n";
42
for($i = $window_start; $i <= $window_end; $i++) {
43
#$s1=$s2=$s3=$s4=$s5=$s6=$s7=$s8=$s9=$s10=$s11=$s12=-100;
44
$im1 = $i-1; $im2 = $i-2; $im3 = $i-3; $im4 = $i-4;
45
$jm1 = $j-1; $jm2 = $j-2; $jm3 = $j-3; $jm4 = $j-4;
47
$s1 = $i>0 && $j>0 ? # 1-1
48
get_score($i-1, $j-1) + match_sentences("$im1 - $jm1")
51
get_score($i-1, $j) + match_sentences("$im1 -")
54
get_score($i, $j-1) + match_sentences("- $jm1")
56
$s4 = $i>1 && $j>0 ? # 2-1
57
get_score($i-2, $j-1) + match_sentences("$im2 $im1 - $jm1")
59
$s5 = $i>0 && $j>1 ? # 1-2
60
get_score($i-1, $j-2) + match_sentences("$im1 - $jm2 $jm1")
62
$s6 = $i>1 && $j>1 ? # 2-2
63
get_score($i-2, $j-2) + match_sentences("$im2 $im1 - $jm2 $jm1")
67
$s7 = $i>0 && $j>2 ? # 1-3
68
get_score($i-1, $j-3) + match_sentences("$im1 - $jm3 $jm2 $jm1")
70
$s8 = $i>2 && $j>0? # 3-1
71
get_score($i-3, $j-1) + match_sentences("$im3 $im2 $im1 - $jm1")
73
$s9 = $i>1 && $j>2 ? # 2-3
74
get_score($i-2, $j-3) + match_sentences("$im2 $im1 - $jm3 $jm2 $jm1")
76
$s10 = $i>2 && $j>1 ? # 3-2
77
get_score($i-3, $j-2) + match_sentences("$im3 $im2 $im1 - $jm2 $jm1")
79
# $s11 = $i>2 && $j>2 ? # 3-3
80
# get_score($i-3, $j-3) + match_sentences("$im3 $im2 $im1 - $jm3 $jm2 $jm1")
82
$s12 = $i>0 && $j>3 ? # 1-4
83
get_score($i-1, $j-4) + match_sentences("$im1 - $jm4 $jm3 $jm2 $jm1")
85
$s13 = $i>3 && $j>0? # 4-1
86
get_score($i-4, $j-1) + match_sentences("$im4 $im3 $im2 $im1 - $jm1")
92
if($s2>$smax) { $smax=$s2 };
93
if($s3>$smax) { $smax=$s3 };
94
if($s4>$smax) { $smax=$s4 };
95
if($s5>$smax) { $smax=$s5 };
96
if($s6>$smax) { $smax=$s6 };
97
if($s7>$smax) { $smax=$s7 };
98
if($s8>$smax) { $smax=$s8 };
99
if($s9>$smax) { $smax=$s9 };
100
if($s10>$smax) { $smax=$s10 };
101
#if($s11>$smax) { $smax=$s11 };
102
if($s12>$smax) { $smax=$s12 };
103
if($s13>$smax) { $smax=$s13 };
108
} elsif ($smax == $s1) { # 1-1
109
set_score($i,$j,$s1);
110
$path_x{"$i,$j"} = $i-1;
111
$path_y{"$i,$j"} = $j-1;
112
} elsif ($smax == $s2) { # 1-0
113
set_score($i,$j,$s2);
114
$path_x{"$i,$j"} = $i-1;
115
$path_y{"$i,$j"} = $j;
116
} elsif ($smax == $s3) { # 0-1
117
set_score($i,$j,$s3);
118
$path_x{"$i,$j"} = $i;
119
$path_y{"$i,$j"} = $j-1;
120
} elsif ($smax == $s4) { # 2-1
121
set_score($i,$j,$s4);
122
$path_x{"$i,$j"} = $i-2;
123
$path_y{"$i,$j"} = $j-1;
124
} elsif ($smax == $s5){ # 1-2
125
set_score($i,$j,$s5);
126
$path_x{"$i,$j"} = $i-1;
127
$path_y{"$i,$j"} = $j-2;
128
} elsif ($smax == $s6) { # 2-2
129
set_score($i,$j,$s6);
130
$path_x{"$i,$j"} = $i-2;
131
$path_y{"$i,$j"} = $j-2;
132
} elsif ($smax == $s7) { # 1-3
133
set_score($i,$j,$s7);
134
$path_x{"$i,$j"} = $i-1;
135
$path_y{"$i,$j"} = $j-3;
136
} elsif ($smax == $s8) { # 3-1
137
set_score($i,$j,$s8);
138
$path_x{"$i,$j"} = $i-3;
139
$path_y{"$i,$j"} = $j-1;
140
} elsif ($smax == $s9) { # 2-3
141
set_score($i,$j,$s9);
142
$path_x{"$i,$j"} = $i-2;
143
$path_y{"$i,$j"} = $j-3;
144
} elsif ($smax == $s10){ # 3-2
145
set_score($i,$j,$s10);
146
$path_x{"$i,$j"} = $i-3;
147
$path_y{"$i,$j"} = $j-2;
148
# } elsif ($smax == $s11) { # 3-3
149
# set_score($i,$j,$s11);
150
# $path_x{"$i,$j"} = $i-3;
151
# $path_y{"$i,$j"} = $j-3;
152
} elsif ($smax == $s12) { # 1-4
153
set_score($i,$j,$s12);
154
$path_x{"$i,$j"} = $i-1;
155
$path_y{"$i,$j"} = $j-4;
156
} elsif ($smax == $s13) { # 4-1
157
set_score($i,$j,$s13);
158
$path_x{"$i,$j"} = $i-4;
159
$path_y{"$i,$j"} = $j-1;
165
for($i=$nx, $j=$ny; $i>0 || $j>0; $i = $oi, $j = $oj, $n++) {
166
$oi = $path_x{"$i,$j"};
167
$oj = $path_y{"$i,$j"};
171
$im1 = $i-1; $im2 = $i-2; $im3 = $i-3;
172
$jm1 = $j-1; $jm2 = $j-2; $jm3 = $j-3;
174
if($si == 1 && $sj == 1) { # 1-1
175
$ralign[$n] = "$i <=> $j";
176
} elsif ($si == 1 && $sj == 0) { # 1-0
177
$ralign[$n] = "$i <=> omitted";
178
} elsif ($si == 0 && $sj == 1) { # 0-1
179
$ralign[$n] = "omitted <=> $j";
180
} elsif ($si == 2 && $sj == 1) { # 2-1
181
$ralign[$n] = "$im1,$i <=> $j";
182
} elsif ($si == 1 && $sj ==2 ) { # 1-2
183
$ralign[$n] = "$i <=> $jm1,$j";
184
} elsif ($si == 2 && $sj == 2) { # 2-2
185
$ralign[$n] = "$im1,$i <=> $jm1,$j";
186
} elsif ($si == 1 && $sj == 3) { # 1-3
187
$ralign[$n] = "$i <=> $jm2,$jm1,$j";
188
} elsif ($si == 3 && $sj == 1) { # 3-1
189
$ralign[$n] = "$im2,$im1,$i <=> $j";
190
} elsif ($si == 2 && $sj == 3) { # 2-3
191
$ralign[$n] = "$im1,$i <=> $jm2,$jm1,$j";
192
} elsif ($si == 3 && $sj == 2) { # 3-2
193
$ralign[$n] = "$im2,$im1,$i <=> $jm1,$j";
194
# } elsif ($si == 3 && $sj == 3) { # 3-3
195
# $ralign[$n] = "$im2,$im1,$i <=> $jm2,$jm1,$j";
196
} elsif ($si == 1 && $sj == 4) { # 1-4
197
$ralign[$n] = "$i <=> $jm3,$jm2,$jm1,$j";
198
} elsif ($si == 4 && $sj == 1) { # 4-1
199
$ralign[$n] = "$im3,$im2,$im1,$i <=> $j";
207
sub match_sentences {
209
my ($score, $x, $y, @x, @y, $nx, $ny, $xlen, $ylen);
210
my $length_penalty = 1;
212
($x, $y) = split '-', $map;
213
#print STDERR "--- $map ---\n";
219
#print STDERR "FS: -0.01\n";
220
return -0.1 if $nx == 0 || $ny == 0;
222
# faster implementation
224
if ($nx == 1 && $ny == 1) {
225
$score = score11(@x, @y);
226
} elsif ($nx == 1 && $ny == 2) {
227
$score = score12(@x, @y);
228
} elsif ($nx == 2 && $ny == 1) {
229
$score = score12(@x, @y);
230
} elsif ($nx == 2 && $ny == 2) {
231
$score = score12(@x, @y);
232
} elsif ($nx == 1 && $ny == 3) {
233
$score = score13(@x, @y);
234
} elsif ($nx == 3 && $ny == 1) {
235
$score = score31(@x, @y);
236
} elsif ($nx == 2 && $ny == 3) {
237
$score = score23(@x, @y);
238
} elsif ($nx == 3 && $ny == 2) {
239
$score = score32(@x, @y);
240
} elsif ($nx == 3 && $ny == 3) {
241
$score = score33(@x, @y);
242
} elsif ($nx == 1 && $ny == 4) {
243
$score = score14(@x, @y);
244
} elsif ($nx == 4 && $ny == 1) {
245
$score = score41(@x, @y);
248
# slower implementation
250
$xsentences = merge_sentences(\@xst, @x);
251
$ysentences = merge_sentences(\@yst, @y);
252
$score = match_sentences_lex(\@x,\@y,$xsentences,$ysentences,\%xtoken_stat);
263
if (max($xlen,$ylen/$xtoyc) > 60) {
264
$length_penalty = log(6+4*min($xlen*$xtoyc,$ylen)/max($xlen*$xtoyc,$ylen))/log(10);
267
if ($nx == 1 && $ny == 1) {
268
return $score * $length_penalty;
269
} elsif ($nx == 1 && $ny == 2) {
270
return $score * $length_penalty * $penalty12;
271
} elsif ($nx == 2 && $ny == 1) {
272
return $score * $length_penalty * $penalty21;
273
} elsif ($nx == 2 && $ny == 2) {
274
return $score * $length_penalty * $penalty22;
275
} elsif ($nx == 1 && $ny == 3) {
276
return $score * $length_penalty * $penalty13;
277
} elsif ($nx == 3 && $ny == 1) {
278
return $score * $length_penalty * $penalty31;
279
} elsif ($nx == 2 && $ny == 3) {
280
return $score * $length_penalty * $penalty23;
281
} elsif ($nx == 3 && $ny == 2) {
282
return $score * $length_penalty * $penalty32;
283
} elsif ($nx == 3 && $ny == 3) {
284
return $score * $length_penalty * $penalty33;
285
} elsif ($nx == 1 && $ny == 4) {
286
return $score * $length_penalty * $penalty14;
287
} elsif ($nx == 4 && $ny == 1) {
288
return $score * $length_penalty * $penalty41;
294
sub match_sentences_lex {
295
my ($xsnts_index, $ysnts_index, $xsentences_ref, $ysentences_ref, $xtoken_stat_href) = @_;
296
my (%xtokens,%ytokens);
297
my $min_pairs = 1, $score = 0;
299
@_ = split ' ', $$xsentences_ref;
304
@_ = split ' ', $$ysentences_ref;
310
#print STDERR "score bag words\n";
312
$x_total_tokens = $$xtoken_stat_href{"TTAALL"};
314
# print STDERR "\n\n", join ' ',@$xsnts_index,"-", join ' ',@$ysnts_index,"\n";
315
foreach $xtoken (keys %xtokens) {
316
if (defined $ytokens{$xtoken} && ! defined $xstop{$xtoken}) {
317
$score += log(($x_total_tokens/$$xtoken_stat_href{$xtoken}) * min($xtokens{$xtoken},$ytokens{$xtoken})+1);
319
foreach $xtoken_trans (@{$dict{$xtoken}}) {
320
if (defined $ytokens{$xtoken_trans}) {
321
$min_pairs = min($xtokens{$xtoken},$ytokens{$xtoken_trans});
322
next if $min_pairs == 0;
323
#print STDERR "$xtoken $xtoken_trans $ytokens{$xtoken_trans}\n";
324
$score += log(($x_total_tokens/$$xtoken_stat_href{$xtoken}) * $min_pairs + 1);
325
$xtokens{$xtoken} -= $min_pairs;
326
$ytokens{$xtoken_trans} -= $min_pairs;
333
#print STDERR "Score: $score\n";
337
sub merge_sentences {
338
my ($st_aref,@st) = @_;
342
if (scalar @st == 1) {
343
$sentences = $$st_aref[$st[0]];
345
# merge two sentences
346
} elsif (scalar @st == 2) {
347
$sentences = "$$st_aref[$st[0]] $$st_aref[$st[1]]";
349
# merge three sentences
350
} elsif (scalar @st == 3) {
351
$sentences = "$$st_aref[$st[0]] $$st_aref[$st[1]] $$st_aref[$st[2]]"
353
# merge four sentences
354
} elsif (scalar @st == 4) {
355
$sentences = "$$st_aref[$st[0]] $$st_aref[$st[1]] $$st_aref[$st[2]] $$st_aref[$st[3]]";
363
local ($x1, $y1) = @_;
365
return $st_scores{"$x1,$y1"};
369
local ($x1, $y1, $y2) = @_;
371
return $st_scores{"$x1,$y1"}+$st_scores{"$x1,$y2"};
376
local ($x1, $x2, $y1) = @_;
378
return $st_scores{"$x1,$y1"}+$st_scores{"$x2, $y1"};
382
local ($x1, $x2, $y1, $y2) = @_;
383
return $st_scores{"$x1,$y1"}+$st_scores{"$x1,$y2"}
384
+ $st_scores{"$x2,$y1"}+$st_scores{"$x2,$y1"};
388
local ($x1, $x2, $x3, $y1) = @_;
389
return $st_scores{"$x1,$y1"}+$st_scores{"$x2,$y1"}
390
+ $st_scores{"$x3,$y1"};
394
local ($x1, $y1, $y2, $y3) = @_;
395
return $st_scores{"$x1,$y1"}+$st_scores{"$x1,$y2"}
396
+ $st_scores{"$x1,$y3"};
400
local ($x1, $x2, $x3, $x4, $y1) = @_;
401
return $st_scores{"$x1,$y1"}+$st_scores{"$x2,$y1"}
402
+ $st_scores{"$x3,$y1"} + $st_scores{"$x4,$y1"};
406
local ($x1, $y1, $y2, $y3, $y4) = @_;
407
return $st_scores{"$x1,$y1"}+$st_scores{"$x1,$y2"}
408
+ $st_scores{"$x1,$y3"}+$st_scores{"$x1,$y4"};