3
# Usage: utf8-num2arabic <utf8 text>
4
# Author: Xiaoyi Ma, LDC, 03/09/2006
5
# Purpose: translates Chinese numbers in utf8 into their Arabic
6
# equivalents, where possible
8
$| = 1; # disable Perl output buffering
13
open STDIN, "<$ARGV[0]" or die "$0: cannot open $ARGV[0]!\n";
16
binmode STDIN, ":utf8";
17
binmode STDOUT, ":utf8";
19
$int = "(○|零|一|二|三|四|五|六|七|八|九|十|百|千|万|亿){2,}";
20
$dec = "(○|零|一|二|三|四|五|六|七|八|九|十|百|千|万|亿)+点(○|零|一|二|三|四|五|六|七|八|九|十|百|千|万|亿)+";
24
s/$int|$dec/&cn2arabic($&)/eg;
43
$cn =~ s/百/b/g; # hundred
44
$cn =~ s/千/q/g; # thousand
45
$cn =~ s/万/w/g; # ten thousand
46
$cn =~ s/亿/y/g; # 100 million
49
return &mid2arabic($cn);
54
my ($int,$fra,$multi,$remain);
59
$int = &mid2arabic($`);
60
$fra = &mid2arabic($');
62
} elsif ($cn =~ /y/) {
63
$multi = &mid2arabic($`);
64
$remain = &mid2arabic($');
65
return $multi*100000000+$remain;
66
} elsif ($cn =~ /w/) {
67
$multi = &mid2arabic($`);
68
$remain = &mid2arabic($');
69
return $multi*10000+$remain;
70
} elsif ($cn =~ /q/) {
71
$multi = &mid2arabic($`);
72
$remain = &mid2arabic($');
73
return $multi*1000+$remain;
74
} elsif ($cn =~ /b/) {
75
$multi = &mid2arabic($`);
76
$remain = &mid2arabic($');
77
return $multi*100+$remain;
78
} elsif ($cn =~ /s/) {
82
$multi = &mid2arabic($`);
84
$remain = &mid2arabic($');
85
return $multi*10+$remain;