2
# Try to detect markup errors in translations.
4
# Author: Peter Moulder <pmoulder@mail.csse.monash.edu.au>
5
# Copyright (C) 2004 Monash University
6
# License: GNU GPL v2 or (at your option) any later version.
8
# Initial egrep version:
10
#egrep '<b>[^<>]*(>|<([^/]|/([^b"]|b[^>])))' "$mydir"/*.po
11
# Somewhat simplified by use of negative lookahead in perl.
12
# (The egrep version as written can't detect problems that span a line,
13
# e.g. unterminated `<b>'. One way of doing the s/"\n"//g thing would be with
14
# tr and sed, but that requires a sed that allows arbitrary line lengths, which
15
# many non-GNU seds don't.)
19
my $com = qr/(?:\#[^\n]*\n)/;
20
my $str = qr/(?:"(?:[^"\\]|\\.)*")/;
21
my $attrsRE = qr/(?: +[^<>]*)?/;
22
my $span_attr = qr/(?:\ +(?:font_(?:desc|family)|face|size|style|weight|variant|stretch|(?:fore|back)ground|underline|rise|strikethrough|fallback|lang)\=\\\"[^\\\"]*\\\")/;
30
print "$name: $msg:\n$_";
34
# Returns true iff successful.
38
$str =~ s/\A"// or die "Bug: No leading `\"' in `$str'";
39
$str =~ s/"\Z// or die "Bug: No trailing `\"' in `$str'";
41
if ($str =~ /\AProject-Id-Version:.*POT-Creation-Date/
42
or $str =~ /\A<[^<>]*>\Z/) {
49
# Remove valid sequences.
50
while ($str =~ s{<([bisu]|big|su[bp]|small|tt|span)(${attrsRE})>[^<>]*</\1>}{}) {
52
my ($tag, $attrs) = ($1, $2);
54
$attrs =~ s/${span_attr}*//g;
58
po_error("Unexpected <span> attributes `$attrs'");
62
if ($attrs !~ /\A *\Z/) {
63
po_error("<$tag> can't have attributes in Pango");
69
if (($str =~ m{&#[^0-9]+;}) or ($str =~ m{&#x[^0-9a-fA-F]+;})) {
70
po_error("Entity declaration error (must look like '{' or '@' and be in ASCII)");
74
if (($str =~ m{&#[^0-9]+}) or ($str =~ m{&#x[^0-9a-fA-F]+})) {
75
po_error("Entity declaration error 2 (must look like '{' or '@' and be in ASCII)");
79
if (($str =~ m{&#(?![0-9]{2,4};)}) or ($str =~ m{&#x(?![0-9a-fA-F]{2,4};)})) {
80
po_error("Entity declaration error 3 (must look like '{' or '@' and be in ASCII)");
84
# Check for attributes etc. in non-<span> element.
85
if ($str =~ m{<([bisu]|big|su[bp]|small|tt)\b(?! *)>}) {
86
po_error("Unexpected characters in <$1> tag");
90
if ($str =~ m{<([bisu]|big|su[bp]|small|span|tt)${attrsRE}>}) {
91
po_error("unclosed <$1>");
95
if ($str =~ m{</\ *([bisu]|big|su[bp]|small|span|tt)\ *>}) {
96
po_error("Unmatched closing </$1>");
101
$str =~ s/<(?:defs|image|rect|svg)>//g;
105
$str =~ s/\([<>][01]\)//g;
109
$str =~ s/\[[<>]\]//g;
110
$str =~ s/\\"[<>]\\"//g;
111
$str =~ s/\xe2\x80\x9e[<>]\xe2\x80\x9c//g;
112
$str =~ s/\xc2\xab[<>]\xc2\xbb//;
115
$str =~ s/\A[^<>]*//;
116
$str =~ s/[^<>]*\Z//;
118
if ($str =~ /\A([<>])\Z/) {
120
po_error("Unescaped `$1'");
128
po_error("parsing error for `$str'");
137
die "check_strs: expecting >= 2 strings";
139
if ((($_[0] eq '""') && ($_[1] =~ /Project-Id-Version:.*POT-Creation-Date:/s))
140
or ($_[0] eq '"> and < scale by:"')) {
141
# Not a Pango string.
144
foreach my $str (@_) {
145
$str eq '""' or check_str($str) or return 0;
152
# Reference for the markup language:
153
# http://developer.gnome.org/doc/API/2.0/pango/PangoMarkupFormat.html
154
# (though not all translation strings will be pango markup strings).
156
if (m{\A${com}*\Z}) {
162
if (!m{\A${com}*msgid[^\n]*\n${com}*msgstr[^\n]*\n${com}*\Z} &&
163
!m{\A${com}*msgid[^\n]*\n${com}*msgid_plural[^\n]*\n${com}*(msgstr\[[^\n]*\n${com}*)+\Z}) {
164
po_error('Not in msg format');
167
if (!m{\A${com}*msgid ${str}\s*\n${com}*msgstr ${str}\s*\n${com}*\Z} &&
168
!m{\A${com}*msgid ${str}\s*\n${com}*msgid_plural ${str}\s*\n${com}*(msgstr\[\d+\] ${str}\s*\n${com}*)+\Z}) {
169
po_error('Mismatched quotes');
173
if (m{\n\#,\ fuzzy}) {
174
# Fuzzy entries aren't used, so ignore them.
175
# (This prevents warnings about mismatching <>/ pattern.)
179
if (m{\A${com}*msgid\ (${str})\n
180
${com}*msgstr\ (${str})\n
182
check_strs($1, $2) or next ENTRY;
184
elsif (m{\A${com}*msgid\ (${str})\n
185
${com}*msgid_plural\ (${str})\n
186
((?:${com}*msgstr\[\d+\]\ ${str}\n${com}*)+)\Z}x) {
187
my ($s1, $s2, $rest) = ($1, $2, $3);
188
my @strs = ($s1, $s2);
189
while ($rest =~ s/\A${com}*msgstr\[\d+\]\ (${str})\n${com}*//) {
192
$rest eq '' or die "BUG: unparsed plural entries `$rest'";
193
check_strs(@strs) or next ENTRY;
195
elsif (m{$str[ \t]}) {
196
po_error('Trailing whitespace');
199
po_error("parse error; may be a bug in po/check-markup");
203
# Some makefiles (currently the top-level Makefile.am) expect this script to
204
# exit 1 if any problems found.