2
$| = 1; # disable Perl output buffering
4
binmode(STDIN, ":utf8");
5
binmode(STDOUT, ":utf8");
12
if (/<seg id=(\d+)>(.*)<\/seg>/) {
13
$segid = $1; $seg = $2;
21
# put space after any period that's followed by a non-number
22
$seg =~ s/\.(\D)/\. $1/g;
23
# put space before any period that's followed by a space
24
# the following space is introduced in the previous command
27
# put space around colons and comas, unless they're surrounded by numbers
28
$seg =~ s/(\d)\.(\d)/$1DOTTKN$2/g;
29
$seg =~ s/(\d)\:(\d)/$1COLONTKN$2/g;
30
$seg =~ s/(\d)\,(\d)/$1COMATKN$2/g;
34
$seg =~ s/(\d)DOTTKN(\d)/$1\.$2/g;
35
$seg =~ s/(\d)COLONTKN(\d)/$1\:$2/g;
36
#$seg =~ s/(\d)COMATKN(\d)/$1\,$2/g;
37
$seg =~ s/(\d)COMATKN(\d)/$1$2/g;
38
$seg =~ s/([a-zA-Z])(\d)/$1 $2/g;
39
$seg =~ s/(\d)([a-zA-Z])/$1 $2/g;
42
print "<seg id=$segid>$seg</seg>\n";