~ubuntu-branches/ubuntu/lucid/postgresql-8.4/lucid-proposed

« back to all changes in this revision

Viewing changes to src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-03-20 12:00:13 UTC
  • Revision ID: james.westby@ubuntu.com-20090320120013-hogj7egc5mjncc5g
Tags: upstream-8.4~0cvs20090328
ImportĀ upstreamĀ versionĀ 8.4~0cvs20090328

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 *        ISO8859_1 <--> UTF8
 
4
 *
 
5
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 
6
 * Portions Copyright (c) 1994, Regents of the University of California
 
7
 *
 
8
 * IDENTIFICATION
 
9
 *        $PostgreSQL$
 
10
 *
 
11
 *-------------------------------------------------------------------------
 
12
 */
 
13
 
 
14
#include "postgres.h"
 
15
#include "fmgr.h"
 
16
#include "mb/pg_wchar.h"
 
17
 
 
18
PG_MODULE_MAGIC;
 
19
 
 
20
PG_FUNCTION_INFO_V1(iso8859_1_to_utf8);
 
21
PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
 
22
 
 
23
extern Datum iso8859_1_to_utf8(PG_FUNCTION_ARGS);
 
24
extern Datum utf8_to_iso8859_1(PG_FUNCTION_ARGS);
 
25
 
 
26
/* ----------
 
27
 * conv_proc(
 
28
 *              INTEGER,        -- source encoding id
 
29
 *              INTEGER,        -- destination encoding id
 
30
 *              CSTRING,        -- source string (null terminated C string)
 
31
 *              CSTRING,        -- destination string (null terminated C string)
 
32
 *              INTEGER         -- source string length
 
33
 * ) returns VOID;
 
34
 * ----------
 
35
 */
 
36
 
 
37
Datum
 
38
iso8859_1_to_utf8(PG_FUNCTION_ARGS)
 
39
{
 
40
        unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
 
41
        unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
 
42
        int                     len = PG_GETARG_INT32(4);
 
43
        unsigned short c;
 
44
 
 
45
        CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
 
46
 
 
47
        while (len > 0)
 
48
        {
 
49
                c = *src;
 
50
                if (c == 0)
 
51
                        report_invalid_encoding(PG_LATIN1, (const char *) src, len);
 
52
                if (!IS_HIGHBIT_SET(c))
 
53
                        *dest++ = c;
 
54
                else
 
55
                {
 
56
                        *dest++ = (c >> 6) | 0xc0;
 
57
                        *dest++ = (c & 0x003f) | HIGHBIT;
 
58
                }
 
59
                src++;
 
60
                len--;
 
61
        }
 
62
        *dest = '\0';
 
63
 
 
64
        PG_RETURN_VOID();
 
65
}
 
66
 
 
67
Datum
 
68
utf8_to_iso8859_1(PG_FUNCTION_ARGS)
 
69
{
 
70
        unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
 
71
        unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
 
72
        int                     len = PG_GETARG_INT32(4);
 
73
        unsigned short c,
 
74
                                c1;
 
75
 
 
76
        CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_LATIN1);
 
77
 
 
78
        while (len > 0)
 
79
        {
 
80
                c = *src;
 
81
                if (c == 0)
 
82
                        report_invalid_encoding(PG_UTF8, (const char *) src, len);
 
83
                /* fast path for ASCII-subset characters */
 
84
                if (!IS_HIGHBIT_SET(c))
 
85
                {
 
86
                        *dest++ = c;
 
87
                        src++;
 
88
                        len--;
 
89
                }
 
90
                else
 
91
                {
 
92
                        int                     l = pg_utf_mblen(src);
 
93
 
 
94
                        if (l > len || !pg_utf8_islegal(src, l))
 
95
                                report_invalid_encoding(PG_UTF8, (const char *) src, len);
 
96
                        if (l != 2)
 
97
                                report_untranslatable_char(PG_UTF8, PG_LATIN1,
 
98
                                                                                   (const char *) src, len);
 
99
                        c1 = src[1] & 0x3f;
 
100
                        c = ((c & 0x1f) << 6) | c1;
 
101
                        if (c >= 0x80 && c <= 0xff)
 
102
                        {
 
103
                                *dest++ = (unsigned char) c;
 
104
                                src += 2;
 
105
                                len -= 2;
 
106
                        }
 
107
                        else
 
108
                                report_untranslatable_char(PG_UTF8, PG_LATIN1,
 
109
                                                                                   (const char *) src, len);
 
110
                }
 
111
        }
 
112
        *dest = '\0';
 
113
 
 
114
        PG_RETURN_VOID();
 
115
}