~ubuntu-branches/ubuntu/karmic/iso-codes/karmic

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env perl
#
# Takes a list of files on the command line and checks for valid
# UTF-8 data. Used for checking .po files.
#
# Copyright © 2009 Tobias Quathamer <toddy@debian.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

use strict;
use warnings;

my $exit_status = 0;

foreach my $filename (@ARGV) {
  my $content_type_checked = 0;
  open FILE, "< $filename";
  while (<FILE>) {
    # Check for valid UTF-8 encoding
    unless (m/\A(
      [\x09\x0A\x0D\x20-\x7E]            # ASCII
    | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
    |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
    | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
    |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
    |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
    | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
    |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
    )*\z/x) {
      # Found invalid characters for UTF-8
      printf("Error in file %s at line number %d:\n", $filename, $.);
      # Show the line with the error
      print;
      $exit_status = 1;
      # Skip the rest of the current file
      last;
    }
    # Check that the Content-Type header field is set correctly.
    if (!$content_type_checked && /Content-Type: text\/plain; charset=UTF-8/) {
      $content_type_checked = 1;
    }
  }
  unless ($content_type_checked) {
    printf("Error in file %s:\n", $filename);
    print("Could not detect correct Content-Type header field.\n");
    $exit_status = 1;
  }
  close FILE;
}

exit($exit_status);