3
# Author: Xiaoyi Ma at the LDC, 06/06/2003
4
# Purpose: given sentences of both sides and an alignment file
5
# merge_alignment.pl merge two sides together and
6
# print an easy-to-read output
7
# Usage: merge_alignment.pl [hod] X_sentence_file Y_sentence_file aligment_result
8
# X_sentence_file: files contains all X sentences, indicated by
10
# the seg ids should be sequential numbers, starting from one.
11
# Y_sentence_file: files contains all Y sentences, indicated by
13
# the seg ids should be sequential numbers, starting from one.
14
# alignment_result: alignment file, one alignment per line
15
$| = 1; # disable Perl output buffering
19
getopts('hod', \%opts) || usage();
22
$printomission = $opts{o};
24
usage() if @ARGV != 3;
26
($efn, $cfn, $align) = @ARGV;
29
open E, "<$efn" or die "$0: can not open $efn\n";
30
open C, "<$cfn" or die "$0: can not open $cfn\n";
31
open A, "<$align" or die "$0: can not open $align\n";
33
$docid = `basename $cfn`;
39
if (/<seg id=(\d+)>(.*)<\/seg>/) {
48
if (/<seg id=(\d+)>(.*)<\/seg>/) {
56
print "<DOC docid=$docid>\n";
61
unless ($printomission) {
66
$esent = $1; $csent = $2;
72
if ($esent =~ /omitted/) {
76
@esent = split /,/, $esent;
80
if ($csent =~ /omitted/) {
84
@csent = split /,/, $csent;
88
print "<SENT type=$etype-$ctype>\n";
106
print STDERR << "EOF";
107
usage: $0 [-hod] <X file> <Y file> <alignment file>
109
-h : this (help) message
110
-o : print deletion and insertion (default is no).
111
-d : debug mode, prints alignment as well. (default is no)