~vcs-imports/mammoth-replicator/trunk

« back to all changes in this revision

Viewing changes to src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl

  • Committer: alvherre
  • Date: 2005-12-16 21:24:52 UTC
  • Revision ID: svn-v4:db760fc0-0f08-0410-9d63-cc6633f64896:trunk:1
Initial import of the REL8_0_3 sources from the Pgsql CVS repository.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#! /usr/bin/perl
 
2
#
 
3
# Copyright (c) 2001-2005, PostgreSQL Global Development Group
 
4
#
 
5
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_cyrillic.pl,v 1.6 2005-01-01 20:44:18 tgl Exp $
 
6
#
 
7
# Generate UTF-8 <--> ISO8859 code conversion tables from
 
8
# map files provided by Unicode organization.
 
9
# Unfortunately it is prohibited by the organization
 
10
# to distribute the map files. So if you try to use this script,
 
11
# you have to obtain "8859-[2-5].TXT" from the organization's ftp site.
 
12
# We assume the file include three tab-separated columns:
 
13
#                ISO/IEC 8859 code in hex
 
14
#                UCS-2 code in hex
 
15
#                # and Unicode name (not used in this script)
 
16
 
 
17
require "ucs2utf.pl";
 
18
%filename = ('KOI8R'=>'koi8-r.txt',
 
19
             'WIN1251'=>'cp1251.txt',
 
20
             'ALT'=>'cp866.txt');
 
21
@charsets = ('KOI8R','ALT','WIN1251');
 
22
foreach $charset (@charsets) {
 
23
 
 
24
#
 
25
# first, generate UTF8->ISO8859 table
 
26
#
 
27
    $in_file = $filename{$charset};
 
28
 
 
29
    open( FILE, $in_file ) || die( "cannot open $in_file" );
 
30
 
 
31
        reset 'array';
 
32
 
 
33
    while( <FILE> ){
 
34
                chop;
 
35
                if( /^#/ ){
 
36
                        next;
 
37
                }
 
38
                ( $c, $u, $rest ) = split;
 
39
                $ucs = hex($u);
 
40
                $code = hex($c);
 
41
                if( $code >= 0x80){
 
42
                        $utf = &ucs2utf($ucs);
 
43
                        if( $array{ $utf } ne "" ){
 
44
                                printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
 
45
                                next;
 
46
                        }
 
47
                        $count++;
 
48
                        $array{ $utf } = $code;
 
49
                }
 
50
        }
 
51
    close( FILE );
 
52
 
 
53
        $file = "utf8_to_${charset}.map";
 
54
    open( FILE, "> $file" ) || die( "cannot open $file" );
 
55
        print FILE "static pg_utf_to_local ULmap_${charset}[ $count ] = {\n";
 
56
 
 
57
        for $index ( sort {$a <=> $b} keys( %array ) ){
 
58
                $code = $array{ $index };
 
59
                $count--;
 
60
                if( $count == 0 ){
 
61
                        printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
 
62
                } else {
 
63
                        printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
 
64
                }
 
65
        }
 
66
 
 
67
        print FILE "};\n";
 
68
        close(FILE);
 
69
 
 
70
#
 
71
# then generate ISO885->UTF8 table
 
72
#
 
73
    open( FILE, $in_file ) || die( "cannot open $in_file" );
 
74
 
 
75
        reset 'array';
 
76
 
 
77
    while( <FILE> ){
 
78
                chop;
 
79
                if( /^#/ ){
 
80
                        next;
 
81
                }
 
82
                ( $c, $u, $rest ) = split;
 
83
                $ucs = hex($u);
 
84
                $code = hex($c);
 
85
                if($code >= 0x80){
 
86
                        $utf = &ucs2utf($ucs);
 
87
                        if( $array{ $utf } ne "" ){
 
88
                                printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
 
89
                                next;
 
90
                        }
 
91
                        $count++;
 
92
                        $array{ $code } = $utf;
 
93
                }
 
94
        }
 
95
    close( FILE );
 
96
 
 
97
        $file = "${charset}_to_utf8.map";
 
98
    open( FILE, "> $file" ) || die( "cannot open $file" );
 
99
        print FILE "static pg_local_to_utf LUmap${charset}[ $count ] = {\n";
 
100
        for $index ( sort {$a <=> $b} keys( %array ) ){
 
101
                $utf = $array{ $index };
 
102
                $count--;
 
103
                if( $count == 0 ){
 
104
                        printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
 
105
                } else {
 
106
                        printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
 
107
                }
 
108
        }
 
109
 
 
110
        print FILE "};\n";
 
111
        close(FILE);
 
112
}