2
* Copyright (C) 1999-2002, 2004-2010 Free Software Foundation, Inc.
3
* This file is part of the GNU LIBICONV Library.
5
* The GNU LIBICONV Library is free software; you can redistribute it
6
* and/or modify it under the terms of the GNU Library General Public
7
* License as published by the Free Software Foundation; either version 2
8
* of the License, or (at your option) any later version.
10
* The GNU LIBICONV Library is distributed in the hope that it will be
11
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Library General Public License for more details.
15
* You should have received a copy of the GNU Library General Public
16
* License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17
* If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18
* Fifth Floor, Boston, MA 02110-1301, USA.
21
/* This file defines all the converters. */
24
/* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
25
typedef unsigned int ucs4_t;
27
/* State used by a conversion. 0 denotes the initial state. */
28
typedef unsigned int state_t;
30
/* iconv_t is an opaque type. This is the real iconv_t type. */
31
typedef struct conv_struct * conv_t;
34
* Data type for conversion multibyte -> unicode
37
int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n);
39
* int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n)
40
* converts the byte sequence starting at s to a wide character. Up to n bytes
41
* are available at s. n is >= 1.
42
* Result is number of bytes consumed (if a wide character was read),
43
* or -1 if invalid, or -2 if n too small, or -2-(number of bytes consumed)
44
* if only a shift sequence was read.
46
int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc);
48
* int xxx_flushwc (conv_t conv, ucs4_t *pwc)
49
* returns to the initial state and stores the pending wide character, if any.
50
* Result is 1 (if a wide character was read) or 0 if none was pending.
54
/* Return code if invalid input after a shift sequence of n bytes was read.
56
#define RET_SHIFT_ILSEQ(n) (-1-2*(n))
57
/* Return code if invalid. (xxx_mbtowc) */
58
#define RET_ILSEQ RET_SHIFT_ILSEQ(0)
59
/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
60
#define RET_TOOFEW(n) (-2-2*(n))
61
/* Retrieve the n from the encoded RET_... value. */
62
#define DECODE_SHIFT_ILSEQ(r) ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2)
63
#define DECODE_TOOFEW(r) ((unsigned int)(RET_TOOFEW(0) - (r)) / 2)
66
* Data type for conversion unicode -> multibyte
69
int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, int n);
71
* int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
72
* converts the wide character wc to the character set xxx, and stores the
73
* result beginning at r. Up to n bytes may be written at r. n is >= 1.
74
* Result is number of bytes written, or -1 if invalid, or -2 if n too small.
76
int (*xxx_reset) (conv_t conv, unsigned char *r, int n);
78
* int xxx_reset (conv_t conv, unsigned char *r, int n)
79
* stores a shift sequences returning to the initial state beginning at r.
80
* Up to n bytes may be written at r. n is >= 0.
81
* Result is number of bytes written, or -2 if n too small.
85
/* Return code if invalid. (xxx_wctomb) */
87
/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
88
#define RET_TOOSMALL -2
91
* Contents of a conversion descriptor.
94
struct loop_funcs lfuncs;
95
/* Input (conversion multibyte -> unicode) */
97
struct mbtowc_funcs ifuncs;
99
/* Output (conversion unicode -> multibyte) */
101
struct wctomb_funcs ofuncs;
104
/* Operation flags */
107
#ifndef LIBICONV_PLUG
108
struct iconv_fallbacks fallbacks;
109
struct iconv_hooks hooks;
114
* Include all the converters.
119
/* General multi-byte encodings */
134
#include "ucs2internal.h"
135
#include "ucs2swapped.h"
136
#include "ucs4internal.h"
137
#include "ucs4swapped.h"
141
/* 8-bit encodings */
142
#include "iso8859_1.h"
143
#include "iso8859_2.h"
144
#include "iso8859_3.h"
145
#include "iso8859_4.h"
146
#include "iso8859_5.h"
147
#include "iso8859_6.h"
148
#include "iso8859_7.h"
149
#include "iso8859_8.h"
150
#include "iso8859_9.h"
151
#include "iso8859_10.h"
152
#include "iso8859_11.h"
153
#include "iso8859_13.h"
154
#include "iso8859_14.h"
155
#include "iso8859_15.h"
156
#include "iso8859_16.h"
173
#include "mac_roman.h"
174
#include "mac_centraleurope.h"
175
#include "mac_iceland.h"
176
#include "mac_croatian.h"
177
#include "mac_romania.h"
178
#include "mac_cyrillic.h"
179
#include "mac_ukraine.h"
180
#include "mac_greek.h"
181
#include "mac_turkish.h"
182
#include "mac_hebrew.h"
183
#include "mac_arabic.h"
184
#include "mac_thai.h"
185
#include "hp_roman8.h"
186
#include "nextstep.h"
187
#include "armscii_8.h"
188
#include "georgian_academy.h"
189
#include "georgian_ps.h"
200
/* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
203
unsigned short indx; /* index into big table */
204
unsigned short used; /* bitmask of used entries */
207
#include "iso646_jp.h"
208
#include "jisx0201.h"
209
#include "jisx0208.h"
210
#include "jisx0212.h"
212
#include "iso646_cn.h"
214
#include "isoir165.h"
215
/*#include "gb12345.h"*/
217
#include "cns11643.h"
221
#include "johab_hangul.h"
223
/* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
228
#include "iso2022_jp.h"
229
#include "iso2022_jp1.h"
230
#include "iso2022_jp2.h"
236
#include "iso2022_cn.h"
237
#include "iso2022_cnext.h"
240
#include "ces_big5.h"
242
#include "big5hkscs1999.h"
243
#include "big5hkscs2001.h"
244
#include "big5hkscs2004.h"
245
#include "big5hkscs2008.h"
250
#include "iso2022_kr.h"
252
/* Encodings used by system dependent locales. */
267
#include "dec_kanji.h"
268
#include "dec_hanyu.h"
290
#include "euc_jisx0213.h"
291
#include "shift_jisx0213.h"
292
#include "iso2022_jp3.h"
293
#include "big5_2003.h"