27
26
#ifdef HAVE_LANGINFO_CODESET
28
27
#include <langinfo.h>
30
#ifndef HAVE_W32_SYSTEM
31
34
#include "libjnlib-config.h"
32
35
#include "stringhelp.h"
33
37
#include "utf8conv.h"
36
static ushort koi8_unicode[128] = {
37
0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,
38
0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
39
0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
40
0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,
41
0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
42
0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,
43
0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
44
0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,
45
0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
46
0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
47
0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
48
0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
49
0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
50
0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
51
0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
52
0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a
55
static ushort latin2_unicode[128] = {
56
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
57
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
58
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
59
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
60
0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
61
0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
62
0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
63
0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
64
0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
65
0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
66
0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
67
0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
68
0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
69
0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
70
0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
71
0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
75
43
static const char *active_charset_name = "iso-8859-1";
76
static ushort *active_charset = NULL;
77
static int no_translation = 0;
44
static unsigned short *active_charset;
45
static int no_translation; /* Set to true if we let simply pass through. */
46
static int use_iconv; /* iconv comversion fucntions required. */
49
/* Under W32 we dlopen the iconv dll and don't require any iconv
50
related headers at all. However we need to define some stuff. */
51
#ifdef HAVE_W32_SYSTEM
52
typedef void *iconv_t;
54
#define ICONV_CONST const
56
static iconv_t (* __stdcall iconv_open) (const char *tocode,
57
const char *fromcode);
58
static size_t (* __stdcall iconv) (iconv_t cd,
59
const char **inbuf, size_t *inbytesleft,
60
char **outbuf, size_t *outbytesleft);
61
static int (* __stdcall iconv_close) (iconv_t cd);
72
done = 1; /* Do it right now because we might get called recursivly
75
handle = dlopen ("iconv.dll", RTLD_LAZY);
78
iconv_open = dlsym (handle, "libiconv_open");
80
iconv = dlsym (handle, "libiconv");
82
iconv_close = dlsym (handle, "libiconv_close");
84
if (!handle || !iconv_close)
86
log_info (_("error loading `%s': %s\n"),
87
"iconv.dll", dlerror ());
88
log_info (_("please see %s for more information\n"),
89
"http://www.gnupg.org/download/iconv.html");
97
return iconv_open? 0: -1;
99
#endif /*HAVE_W32_SYSTEM*/
102
/* Error handler for iconv failures. This is needed to not clutter the
103
output with repeated diagnostics about a missing conversion. */
105
handle_iconv_error (const char *to, const char *from, int use_fallback)
109
static int shown1, shown2;
112
if (to && !strcmp (to, "utf-8"))
124
log_info (_("conversion from `%s' to `%s' not available\n"),
132
log_info (_("iconv_open failed: %s\n"), strerror (errno));
138
/* To avoid further error messages we fallback to Latin-1 for the
139
native encoding. This is justified as one can expect that on a
140
utf-8 enabled system nl_langinfo() will work and thus we won't
141
never get to here. Thus Latin-1 seems to be a reasonable
143
active_charset_name = "iso-8859-1";
145
active_charset = NULL;
80
153
set_native_charset (const char *newset)
155
const char *full_newset;
159
#ifdef HAVE_W32_SYSTEM
160
static char codepage[30];
164
/* We are a console program thus we need to use the
165
GetConsoleOutputCP function and not the the GetACP which
166
would give the codepage for a GUI program. Note this is not
167
a bulletproof detection because GetConsoleCP might return a
168
different one for console input. Not sure how to cope with
169
that. If the console Code page is not known we fall back to
170
the system code page. */
171
cpno = GetConsoleOutputCP ();
174
sprintf (codepage, "CP%u", cpno );
175
/* Resolve alias. We use a long string string and not the usual
176
array to optimize if the code is taken to a DSO. Taken from
179
for (aliases = ("CP936" "\0" "GBK" "\0"
180
"CP1361" "\0" "JOHAB" "\0"
181
"CP20127" "\0" "ASCII" "\0"
182
"CP20866" "\0" "KOI8-R" "\0"
183
"CP21866" "\0" "KOI8-RU" "\0"
184
"CP28591" "\0" "ISO-8859-1" "\0"
185
"CP28592" "\0" "ISO-8859-2" "\0"
186
"CP28593" "\0" "ISO-8859-3" "\0"
187
"CP28594" "\0" "ISO-8859-4" "\0"
188
"CP28595" "\0" "ISO-8859-5" "\0"
189
"CP28596" "\0" "ISO-8859-6" "\0"
190
"CP28597" "\0" "ISO-8859-7" "\0"
191
"CP28598" "\0" "ISO-8859-8" "\0"
192
"CP28599" "\0" "ISO-8859-9" "\0"
193
"CP28605" "\0" "ISO-8859-15" "\0"
194
"CP65001" "\0" "UTF-8" "\0");
196
aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
198
if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
200
newset = aliases + strlen (aliases) + 1;
205
#else /*!HAVE_W32_SYSTEM*/
83
207
#ifdef HAVE_LANGINFO_CODESET
84
newset = nl_langinfo (CODESET);
208
newset = nl_langinfo (CODESET);
209
#else /*!HAVE_LANGINFO_CODESET*/
210
/* Try to get the used charset from environment variables. */
211
static char codepage[30];
212
const char *lc, *dot, *mod;
214
strcpy (codepage, "iso-8859-1");
215
lc = getenv ("LC_ALL");
218
lc = getenv ("LC_CTYPE");
220
lc = getenv ("LANG");
224
dot = strchr (lc, '.');
227
mod = strchr (++dot, '@');
229
mod = dot + strlen (dot);
230
if (mod - dot < sizeof codepage && dot != mod)
232
memcpy (codepage, dot, mod - dot);
233
codepage [mod - dot] = 0;
238
#endif /*!HAVE_LANGINFO_CODESET*/
239
#endif /*!HAVE_W32_SYSTEM*/
242
full_newset = newset;
89
243
if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
92
246
if (*newset == '-' || *newset == '_')
97
|| !ascii_strcasecmp (newset, "8859-1")
98
|| !ascii_strcasecmp (newset, "8859-15"))
250
/* Note that we silently assume that plain ASCII is actually meant
251
as Latin-1. This makes sense because many Unix system don't have
252
their locale set up properly and thus would get annoying error
253
messages and we have to handle all the "bug" reports. Latin-1 has
254
always been the character set used for 8 bit characters on Unix
257
|| !ascii_strcasecmp (newset, "8859-1" )
258
|| !ascii_strcasecmp (newset, "646" )
259
|| !ascii_strcasecmp (newset, "ASCII" )
260
|| !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
100
263
active_charset_name = "iso-8859-1";
101
264
no_translation = 0;
102
265
active_charset = NULL;
104
else if (!ascii_strcasecmp (newset, "8859-2"))
106
active_charset_name = "iso-8859-2";
108
active_charset = latin2_unicode;
110
else if (!ascii_strcasecmp (newset, "koi8-r"))
112
active_charset_name = "koi8-r";
114
active_charset = koi8_unicode;
116
else if (!ascii_strcasecmp (newset, "utf8")
117
|| !ascii_strcasecmp (newset, "utf-8"))
268
else if ( !ascii_strcasecmp (newset, "utf8" )
269
|| !ascii_strcasecmp(newset, "utf-8") )
119
271
active_charset_name = "utf-8";
120
272
no_translation = 1;
121
273
active_charset = NULL;
280
#ifdef HAVE_W32_SYSTEM
281
if (load_libiconv ())
283
#endif /*HAVE_W32_SYSTEM*/
285
cd = iconv_open (full_newset, "utf-8");
286
if (cd == (iconv_t)-1)
288
handle_iconv_error (full_newset, "utf-8", 0);
292
cd = iconv_open ("utf-8", full_newset);
293
if (cd == (iconv_t)-1)
295
handle_iconv_error ("utf-8", full_newset, 0);
299
active_charset_name = full_newset;
301
active_charset = NULL;
442
{ /* allocate the buffer after the first pass */
622
/* Allocate the buffer after the first pass. */
443
623
buffer = p = jnlib_xmalloc (n + 1);
627
/* Note: See above for comments. */
630
char *outbuf, *outptr;
631
size_t inbytes, outbytes;
633
*p = 0; /* Terminate the buffer. */
635
cd = iconv_open (active_charset_name, "utf-8");
636
if (cd == (iconv_t)-1)
638
handle_iconv_error (active_charset_name, "utf-8", 1);
640
return utf8_to_native (string, length, delim);
643
/* Allocate a new buffer large enough to hold all possible
648
outbytes = n * MB_LEN_MAX;
649
if (outbytes / MB_LEN_MAX != n)
650
BUG (); /* Actually an overflow. */
651
outbuf = outptr = jnlib_xmalloc (outbytes);
652
if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
653
&outptr, &outbytes) == (size_t)-1)
658
log_info (_("conversion from `%s' to `%s' failed: %s\n"),
659
"utf-8", active_charset_name, strerror (errno));
661
/* Didn't worked out. Try again but without iconv. */
665
outbuf = do_utf8_to_native (string, length, delim, 0);
669
*outptr = 0; /* Make sure it is a string. */
670
/* We could realloc the buffer now but I doubt that it
671
makes much sense given that it will get freed
672
anyway soon after. */
678
else /* Not using iconv. */
447
*p = 0; /* make a string */
680
*p = 0; /* Make sure it is a string. */
686
/* Convert string, which is in UTF-8 to native encoding. Replace
687
illegal encodings by some "\xnn" and quote all control
688
characters. A character with value DELIM will always be quoted, it
689
must be a vanilla ASCII character. A DELIM value of -1 is special:
690
it disables all quoting of control characters. */
692
utf8_to_native (const char *string, size_t length, int delim)
694
return do_utf8_to_native (string, length, delim, use_iconv);
700
/* Wrapper function for iconv_open, required for W32 as we dlopen that
701
library on that system. */
703
jnlib_iconv_open (const char *tocode, const char *fromcode)
705
#ifdef HAVE_W32_SYSTEM
706
if (load_libiconv ())
707
return (jnlib_iconv_t)(-1);
708
#endif /*HAVE_W32_SYSTEM*/
710
return (jnlib_iconv_t)iconv_open (tocode, fromcode);
714
/* Wrapper function for iconv, required for W32 as we dlopen that
715
library on that system. */
717
jnlib_iconv (jnlib_iconv_t cd,
718
const char **inbuf, size_t *inbytesleft,
719
char **outbuf, size_t *outbytesleft)
722
#ifdef HAVE_W32_SYSTEM
723
if (load_libiconv ())
725
#endif /*HAVE_W32_SYSTEM*/
727
return iconv ((iconv_t)cd, (char**)inbuf, inbytesleft, outbuf, outbytesleft);
730
/* Wrapper function for iconv_close, required for W32 as we dlopen that
731
library on that system. */
733
jnlib_iconv_close (jnlib_iconv_t cd)
735
#ifdef HAVE_W32_SYSTEM
736
if (load_libiconv ())
738
#endif /*HAVE_W32_SYSTEM*/
740
return iconv_close ((iconv_t)cd);