3
# Copyright (c) 2001-2005, PostgreSQL Global Development Group
5
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl,v 1.6 2005-01-01 20:44:18 tgl Exp $
7
# Generate UTF-8 <--> EUC_TW code conversion tables from
8
# map files provided by Unicode organization.
9
# Unfortunately it is prohibited by the organization
10
# to distribute the map files. So if you try to use this script,
11
# you have to obtain CNS11643.TXT from
12
# the organization's ftp site.
14
# CNS11643.TXT format:
15
# CNS11643 code in hex (3 bytes)
16
# (I guess the first byte means the plane No.)
18
# # and Unicode name (not used in this script)
22
# first generate UTF-8 --> EUC_TW table
24
$in_file = "CNS11643.TXT";
26
open( FILE, $in_file ) || die( "cannot open $in_file" );
33
( $c, $u, $rest ) = split;
36
if( $code >= 0x80 && $ucs >= 0x0080 ){
37
$utf = &ucs2utf($ucs);
38
if( $array{ $utf } ne "" ){
39
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
44
$plane = ($code & 0x1f0000) >> 16;
46
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
51
$array{ $utf } = (($code & 0xffff) | 0x8080);
53
$array{ $utf } = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
60
# first, generate UTF8 --> EUC_TW table
63
$file = "utf8_to_euc_tw.map";
64
open( FILE, "> $file" ) || die( "cannot open $file" );
65
print FILE "static pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
67
for $index ( sort {$a <=> $b} keys( %array ) ){
68
$code = $array{ $index };
71
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
73
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
81
# then generate EUC_JP --> UTF8 table
85
open( FILE, $in_file ) || die( "cannot open $in_file" );
92
( $c, $u, $rest ) = split;
95
if( $code >= 0x80 && $ucs >= 0x0080 ){
96
$utf = &ucs2utf($ucs);
97
if( $array{ $code } ne "" ){
98
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
103
$plane = ($code & 0x1f0000) >> 16;
105
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
110
$c = (($code & 0xffff) | 0x8080);
114
$c = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
120
$file = "euc_tw_to_utf8.map";
121
open( FILE, "> $file" ) || die( "cannot open $file" );
122
print FILE "static pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
123
for $index ( sort {$a <=> $b} keys( %array ) ){
124
$utf = $array{ $index };
127
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
129
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;