3
# Copyright (c) 2001-2005, PostgreSQL Global Development Group
5
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl,v 1.6 2005-01-01 20:44:18 tgl Exp $
7
# Generate UTF-8 <--> EUC_JP code conversion tables from
8
# map files provided by Unicode organization.
9
# Unfortunately it is prohibited by the organization
10
# to distribute the map files. So if you try to use this script,
11
# you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from
12
# the organization's ftp site.
17
# # and Unicode name (not used in this script)
20
# JIS0208 shift-JIS code in hex
23
# # and Unicode name (not used in this script)
28
# # and Unicode name (not used in this script)
32
# first generate UTF-8 --> EUC_JP table
37
$in_file = "JIS0201.TXT";
39
open( FILE, $in_file ) || die( "cannot open $in_file" );
48
( $c, $u, $rest ) = split;
51
if( $code >= 0x80 && $ucs >= 0x0080 ){
52
$utf = &ucs2utf($ucs);
53
if( $array{ $utf } ne "" ){
54
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
60
$array{ $utf } = ($code | 0x8e00);
68
$in_file = "JIS0208.TXT";
70
open( FILE, $in_file ) || die( "cannot open $in_file" );
77
( $s, $c, $u, $rest ) = split;
80
if( $code >= 0x80 && $ucs >= 0x0080 ){
81
$utf = &ucs2utf($ucs);
82
if( $array{ $utf } ne "" ){
83
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
88
$array{ $utf } = ($code | 0x8080);
96
$in_file = "JIS0212.TXT";
98
open( FILE, $in_file ) || die( "cannot open $in_file" );
105
( $c, $u, $rest ) = split;
108
if( $code >= 0x80 && $ucs >= 0x0080 ){
109
$utf = &ucs2utf($ucs);
110
if( $array{ $utf } ne "" ){
111
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
116
$array{ $utf } = ($code | 0x8f8080);
122
# first, generate UTF8 --> EUC_JP table
125
$file = "utf8_to_euc_jp.map";
126
open( FILE, "> $file" ) || die( "cannot open $file" );
127
print FILE "static pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
129
for $index ( sort {$a <=> $b} keys( %array ) ){
130
$code = $array{ $index };
133
printf FILE " {0x%04x, 0x%04x}\n", $index, $code;
135
printf FILE " {0x%04x, 0x%04x},\n", $index, $code;
143
# then generate EUC_JP --> UTF8 table
149
$in_file = "JIS0201.TXT";
151
open( FILE, $in_file ) || die( "cannot open $in_file" );
160
( $c, $u, $rest ) = split;
163
if( $code >= 0x80 && $ucs >= 0x0080 ){
164
$utf = &ucs2utf($ucs);
165
if( $array{ $code } ne "" ){
166
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
173
$array{ $code } = $utf;
181
$in_file = "JIS0208.TXT";
183
open( FILE, $in_file ) || die( "cannot open $in_file" );
190
( $s, $c, $u, $rest ) = split;
193
if( $code >= 0x80 && $ucs >= 0x0080 ){
194
$utf = &ucs2utf($ucs);
195
if( $array{ $code } ne "" ){
196
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
202
$array{ $code } = $utf;
210
$in_file = "JIS0212.TXT";
212
open( FILE, $in_file ) || die( "cannot open $in_file" );
219
( $c, $u, $rest ) = split;
222
if( $code >= 0x80 && $ucs >= 0x0080 ){
223
$utf = &ucs2utf($ucs);
224
if( $array{ $code } ne "" ){
225
printf STDERR "Warning: duplicate code: %04x\n",$ucs;
231
$array{ $code } = $utf;
236
$file = "euc_jp_to_utf8.map";
237
open( FILE, "> $file" ) || die( "cannot open $file" );
238
print FILE "static pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
239
for $index ( sort {$a <=> $b} keys( %array ) ){
240
$utf = $array{ $index };
243
printf FILE " {0x%04x, 0x%04x}\n", $index, $utf;
245
printf FILE " {0x%04x, 0x%04x},\n", $index, $utf;