3
# Create a character mapping for GB2312 encoding.
6
# Requires the map file GB2312.TXT (mapping actually GB2312-80) in the
7
# current directory, produces the map file CSGB2312.TXT
9
# Copyright (C) 2000 Martin Schwartz. All rights reserved.
10
# This program is free software; you can redistribute it and/or
11
# modify it under the same terms as Perl itself.
13
# Contact: Martin Schwartz <martin@nacho.de>
18
# GB2312 to Unicode table; a mixed one byte, two byte mapping.
20
# NOTE: This file is generated automatically from GB2312.TXT by mkCSGB2312
21
# It is constructed from the mappings of:
23
# - ISO8859-1 characters 0x0000 .. 0x00FF
25
# - GB2312-80 characters in EUC form.
27
# Actually GB2312 should not incorporate the whole ISO8859-1 set, but only the
28
# Unicode characters 0x0020 to 0x007f. World's usage is different...
29
# As an effect of this a round trip conversion GB2312 -> UTF16 -> GB2312 will
30
# produce differences if the original GB2312 encoding contains one or more
31
# of these ISO-8859-1 one byte characters:
33
# 0xA4, 0xA7, 0xA8, 0xB0, 0xB1, 0xD7, 0xE0, 0xE1, 0xE8, 0xE9,
34
# 0xEA, 0xEC, 0xED, 0xF2, 0xF3, 0xF7, 0xF9, 0xFA, 0xFC
36
# Anyway these differences shouldn't cause rendering problems, since the
37
# translation back to GB2312 for these characters will utilize an original
38
# character of set GB2312-80.
40
# martin [2000-Jun-19]
47
print "Creating GB2312 encoding, based on GB2312-80 encoding.\n";
52
print "Done. Saved as CSGB2312.TXT\n";
56
open ( GB2312, "GB2312.TXT" )
57
or die "Can't open input file GB2312.TXT! ($!)"
59
open ( CSGB2312, ">CSGB2312.TXT" )
60
or die "Can't open output file CSGB2312.TXT! ($!)"
69
print CSGB2312 "\n# ISO-8859-1 characters (0x0000-0x00ff):\n\n";
70
for ( 0x00 .. 0xff ) {
71
printf CSGB2312 "0x%02x\t0x%04x\n", $_, $_;
74
# print CSGB2312 "\n\n# Unambiguous ISO-8859-1 characters:\n\n";
76
# 0x80..0xa3, 0xa5..0xa6, 0xa9..0xaf, 0xb2..0xd6,
77
# 0xd8..0xdf, 0xe2..0xe7, 0xeb, 0xee..0xf1, 0xf4..0xf6,
78
# 0xf8, 0xfb, 0xfd, 0xfe, 0xff
80
# printf CSGB2312 "0x%02x\t0x%04x\n", $_, $_;
83
print CSGB2312 "\n\n# GB2312-80 characters:\n\n";
86
my ($gb, $uni) = /(0x....)\s+(0x....)/;
88
my $euc = hex ($gb) | 0x8080;
89
printf CSGB2312 "0x%04x\t$uni\n", $euc;
93
print CSGB2312 "\n# End of file\n";
98
or die "Can't close input file GB2312.TXT! ($!)"
101
or die "Can't close output file CSGB2312.TXT! ($!)"