~ubuntu-branches/ubuntu/feisty/gnupg2/feisty

Committer: Bazaar Package Importer
Author(s): Martin Pitt
Date: 2006-11-24 18:48:23 UTC
mfrom: (1.1.4 upstream)
Revision ID: james.westby@ubuntu.com-20061124184823-17ir9m46tl09n9k4

Tags: 2.0.0-4ubuntu1

* Synchronize to Debian, reapply remaining Ubuntu changes to pristine Debian
  version:
  - Remove libpcsclite-dev, libopensc2-dev build dependencies (they are in
    universe).

files added:
README.SVN

agent/call-pinentry.c

common/common-defs.h

common/convert.c

common/gpgrlhelp.c

common/http.c

common/http.h

common/keyserver.h

common/openpgpdefs.h

common/srv.c

common/srv.h

common/t-convert.c

debian/gnupg-agent.install

debian/gnupg-agent.lintian

debian/gnupg2.info

debian/gnupg2.install

debian/gnupg2.lintian

debian/gpgsm.install

debian/gpgsm.lintian

doc/DETAILS

doc/FAQ

doc/HACKING

doc/KEYSERVER

doc/OpenPGP

doc/TRANSLATE

doc/com-certs.pem

doc/examples

doc/examples/README

doc/examples/scd-event

doc/examples/trustlist.txt

doc/faq.html

doc/faq.raw

doc/gnupg.info-1

doc/gnupg7.texi

doc/instguide.texi

doc/opt-homedir.texi

doc/samplekeys.asc

doc/see-also-note.texi

doc/specify-user-id.texi

doc/texi.css

doc/yat2m.c

include/Makefile.am

include/Makefile.in

jnlib/mischelp.c

keyserver

keyserver/ChangeLog

keyserver/Makefile.am

keyserver/Makefile.in

keyserver/curl-shim.c

keyserver/curl-shim.h

keyserver/gpgkeys_curl.c

keyserver/gpgkeys_finger.c

keyserver/gpgkeys_hkp.c

keyserver/gpgkeys_ldap.c

keyserver/gpgkeys_mailto.in

keyserver/gpgkeys_test.in

keyserver/ksutil.c

keyserver/ksutil.h

keyserver/no-libgcrypt.c

m4/autobuild.m4

m4/gnupg-pth.m4

m4/ldap.m4

m4/libcurl.m4

m4/libusb.m4

m4/readline.m4

m4/tar-ustar.m4

po/be.gmo

po/be.po

po/ca.gmo

po/ca.po

po/cs.gmo

po/cs.po

po/da.gmo

po/da.po

po/el.gmo

po/el.po

po/en@boldquot.gmo

po/en@boldquot.po

po/en@quot.gmo

po/en@quot.po

po/eo.gmo

po/eo.po

po/es.gmo

po/es.po

po/et.gmo

po/et.po

po/fi.gmo

po/fi.po

po/fr.gmo

po/fr.po

po/gl.gmo

po/gl.po

po/hu.gmo

po/hu.po

po/id.gmo

po/id.po

po/it.gmo

po/it.po

po/ja.gmo

po/ja.po

po/nb.gmo

po/nb.po

po/pl.gmo

po/pl.po

po/pt.gmo

po/pt.po

po/pt_BR.gmo

po/pt_BR.po

po/ro.gmo

po/ro.po

po/ru.gmo

po/ru.po

po/sk.gmo

po/sk.po

po/sv.gmo

po/sv.po

po/tr.gmo

po/tr.po

po/zh_CN.gmo

po/zh_CN.po

po/zh_TW.gmo

po/zh_TW.po

tests/openpgp

tests/openpgp/ChangeLog

tests/openpgp/Makefile.am

tests/openpgp/Makefile.in

tests/openpgp/armdetach.test

tests/openpgp/armdetachm.test

tests/openpgp/armencrypt.test

tests/openpgp/armencryptp.test

tests/openpgp/armor.test

tests/openpgp/armsignencrypt.test

tests/openpgp/armsigs.test

tests/openpgp/bug537-test.data.asc

tests/openpgp/clearsig.test

tests/openpgp/conventional-mdc.test

tests/openpgp/conventional.test

tests/openpgp/decrypt-dsa.test

tests/openpgp/decrypt.test

tests/openpgp/defs.inc

tests/openpgp/detach.test

tests/openpgp/detachm.test

tests/openpgp/encrypt-dsa.test

tests/openpgp/encrypt.test

tests/openpgp/encryptp.test

tests/openpgp/genkey1024.test

tests/openpgp/gpg.conf.tmpl

tests/openpgp/mds.test

tests/openpgp/mkdemodirs

tests/openpgp/multisig.test

tests/openpgp/plain-1-pgp.asc

tests/openpgp/plain-1.asc

tests/openpgp/plain-1o.asc

tests/openpgp/plain-2.asc

tests/openpgp/plain-2o.asc

tests/openpgp/plain-3.asc

tests/openpgp/plain-3o.asc

tests/openpgp/pubdemo.asc

tests/openpgp/pubring.asc

tests/openpgp/pubring.pkr.asc

tests/openpgp/seat.test

tests/openpgp/secdemo.asc

tests/openpgp/secring.asc

tests/openpgp/secring.skr.asc

tests/openpgp/signdemokey

tests/openpgp/signencrypt-dsa.test

tests/openpgp/signencrypt.test

tests/openpgp/sigs-dsa.test

tests/openpgp/sigs.test

tests/openpgp/verify.test

tests/openpgp/version.test

tools/clean-sat.c

tools/convert-from-106

tools/gpg-zip.in

tools/gpgsplit.c

tools/lspgpot

tools/mail-signed-keys

tools/make-dns-cert.c

tools/mk-tdata.c

files removed:
README.CVS

agent/query.c

common/maperror.c

debian/distfiles

debian/gnupg-agent.files

debian/gnupg2.files

debian/gpgsm.files

debian/point-to-info.1

debian/postinst

debian/postrm

debian/preinst

doc/assuan.texi

gl/m4/strpbrk.m4

gl/m4/strsep.m4

gl/strpbrk.c

gl/strpbrk.h

gl/strsep.c

gl/strsep.h

include/distfiles

include/errors.h

include/http.h

include/i18n.h

include/keyserver.h

include/memory.h

include/mpi.h

include/util.h

files modified:
AUTHORS

ChangeLog

Makefile.am

Makefile.in

NEWS

README

THANKS

TODO

VERSION

acinclude.m4

aclocal.m4

agent/ChangeLog

agent/Makefile.am

agent/Makefile.in

agent/agent.h

agent/call-scd.c

agent/command-ssh.c

agent/command.c

agent/divert-scd.c

agent/findkey.c

agent/genkey.c

agent/gpg-agent.c

agent/learncard.c

agent/minip12.c

agent/pkdecrypt.c

agent/pksign.c

agent/preset-passphrase.c

agent/protect-tool.c

agent/trustlist.c

autogen.sh

common/ChangeLog

common/Makefile.am

common/Makefile.in

common/asshelp.c

common/b64enc.c

common/estream.c

common/estream.h

common/exechelp.c

common/exechelp.h

common/i18n.h

common/iobuf.c

common/iobuf.h

common/membuf.c

common/membuf.h

common/miscellaneous.c

common/ttyio.c

common/ttyio.h

common/util.h

common/xreadline.c

config.guess

config.h.in

config.sub

configure

configure.ac

debian/changelog

debian/control

debian/copyright

debian/gnupg-agent.xsession

debian/rules

doc/ChangeLog

doc/Makefile.am

doc/Makefile.in

doc/debugging.texi

doc/glossary.texi

doc/gnupg.info

doc/gnupg.texi

doc/gpg-agent.texi

doc/gpg.texi

doc/gpgsm.texi

doc/gpl.texi

doc/scdaemon.texi

doc/stamp-vti

doc/sysnotes.texi

doc/tools.texi

doc/version.texi

g10/ChangeLog

g10/Makefile.am

g10/Makefile.in

g10/armor.c

g10/build-packet.c

g10/call-agent.c

g10/call-agent.h

g10/card-util.c

g10/cipher.c

g10/compress.c

g10/dearmor.c

g10/decrypt.c

g10/delkey.c

g10/encode.c

g10/encr-data.c

g10/exec.c

g10/export.c

g10/getkey.c

g10/gpg.c

g10/gpgv.c

g10/import.c

g10/keydb.c

g10/keydb.h

g10/keyedit.c

g10/keygen.c

g10/keyid.c

g10/keylist.c

g10/keyring.c

g10/keyserver-internal.h

g10/keyserver.c

g10/main.h

g10/mainproc.c

g10/misc.c

g10/openfile.c

g10/options.h

g10/options.skel

g10/packet.h

g10/parse-packet.c

g10/passphrase.c

g10/pkclist.c

g10/plaintext.c

g10/pubkey-enc.c

g10/revoke.c

g10/seckey-cert.c

g10/seskey.c

g10/sign.c

g10/skclist.c

g10/status.c

g10/tdbio.c

g10/verify.c

gl/Makefile.am

gl/Makefile.in

gl/m4/gnulib.m4

include/ChangeLog

jnlib/ChangeLog

jnlib/Makefile.am

jnlib/Makefile.in

jnlib/README

jnlib/argparse.c

jnlib/argparse.h

jnlib/dotlock.c

jnlib/dotlock.h

jnlib/libjnlib-config.h

jnlib/logging.c

jnlib/logging.h

jnlib/mischelp.h

jnlib/stringhelp.c

jnlib/stringhelp.h

jnlib/strlist.c

jnlib/strlist.h

jnlib/types.h

jnlib/utf8conv.c

jnlib/utf8conv.h

jnlib/w32-afunix.c

jnlib/w32-afunix.h

jnlib/w32-pth.c

jnlib/w32-pth.h

kbx/ChangeLog

kbx/Makefile.am

kbx/Makefile.in

kbx/keybox-openpgp.c

kbx/keybox-search-desc.h

kbx/keybox-search.c

m4/ChangeLog

m4/Makefile.am

m4/Makefile.in

m4/ksba.m4

m4/libassuan.m4

po/ChangeLog

po/LINGUAS

po/POTFILES.in

po/de.gmo

po/de.po

po/gnupg2.pot

scd/ChangeLog

scd/Makefile.am

scd/Makefile.in

scd/apdu.c

scd/app-dinsig.c

scd/app-nks.c

scd/app-openpgp.c

scd/app-p15.c

scd/app.c

scd/command.c

scd/iso7816.c

scd/pcsc-wrapper.c

scd/scdaemon.c

scd/scdaemon.h

sm/ChangeLog

sm/Makefile.am

sm/Makefile.in

sm/base64.c

sm/call-agent.c

sm/call-dirmngr.c

sm/certchain.c

sm/certcheck.c

sm/certdump.c

sm/certlist.c

sm/certreqgen.c

sm/delete.c

sm/encrypt.c

sm/export.c

sm/gpgsm.c

sm/gpgsm.h

sm/import.c

sm/keydb.c

sm/keydb.h

sm/keylist.c

sm/qualified.c

sm/server.c

sm/sign.c

sm/verify.c

tests/ChangeLog

tests/Makefile.am

tests/Makefile.in

tests/asschk.c

tools/ChangeLog

tools/Makefile.am

tools/Makefile.in

tools/gpg-connect-agent.c

tools/gpgconf-comp.c

tools/gpgkey2ssh.c

tools/gpgsm-gencert.sh

tools/no-libgcrypt.c

tools/symcryptrun.c

version

Show diffs side-by-side

added added

removed removed

jnlib/utf8conv.c

/* utf8conf.c - UTF8 character set conversion

* 2003 Free Software Foundation, Inc.

* This file is part of GnuPG.

* GnuPG is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

* GnuPG is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,

* USA.

* 2003, 2006 Free Software Foundation, Inc.

* This file is part of JNLIB.

* JNLIB is free software; you can redistribute it and/or modify it

* under the terms of the GNU Lesser General Public License as

* published by the Free Software Foundation; either version 2.1 of

* the License, or (at your option) any later version.

* JNLIB is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public

* License along with this program; if not, write to the Free Software

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA

* 02110-1301, USA.

#include <config.h>

#ifdef HAVE_LANGINFO_CODESET

#include <langinfo.h>

#endif

#include <errno.h>

#include <iconv.h>

#include "libjnlib-config.h"

#include "stringhelp.h"

#include "utf8conv.h"

static ushort koi8_unicode[128] = {

0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,

0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,

0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,

0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,

0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,

0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,

0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,

0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,

0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,

0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,

0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,

0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,

0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,

0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,

0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,

0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a

};

static ushort latin2_unicode[128] = {

0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,

0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,

0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,

0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,

0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,

0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,

0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,

0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,

0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,

0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,

0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,

0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,

0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,

0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,

0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,

0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9

};

#ifndef MB_LEN_MAX

#define MB_LEN_MAX 16

#endif

static const char *active_charset_name = "iso-8859-1";

static ushort *active_charset = NULL;

static int no_translation = 0;

static unsigned short *active_charset;

static int no_translation; /* Set to true if we let simply pass through. */

static int use_iconv; /* iconv comversion fucntions required. */

/* Error handler for iconv failures. This is needed to not clutter the

output with repeated diagnostics about a missing conversion. */

static void

handle_iconv_error (const char *to, const char *from, int use_fallback)

{

if (errno == EINVAL)

{

static int shown1, shown2;

int x;

if (to && !strcmp (to, "utf-8"))

{

x = shown1;

shown1 = 1;

}

else

{

x = shown2;

shown2 = 1;

}

if (!x)

log_info (_("conversion from `%s' to `%s' not available\n"),

from, to);

}

else

{

static int shown;

if (!shown)

log_info (_("iconv_open failed: %s\n"), strerror (errno));

shown = 1;

}

if (use_fallback)

{

/* To avoid further error messages we fallback to Latin-1 for the

native encoding. This is justified as one can expect that on a

utf-8 enabled system nl_langinfo() will work and thus we won't

never get to here. Thus Latin-1 seems to be a reasonable

default. */

active_charset_name = "iso-8859-1";

no_translation = 0;

active_charset = NULL;

use_iconv = 0;

}

int

set_native_charset (const char *newset)

100

{

if (!newset)

101

const char *full_newset;

102

103

if (!newset)

104

{

105

#ifdef HABE_W32_SYSTEM

106

static char codepage[30];

107

unsigned int cpno;

108

const char *aliases;

109

110

/* We are a console program thus we need to use the

111

GetConsoleOutputCP function and not the the GetACP which

112

would give the codepage for a GUI program. Note this is not

113

a bulletproof detection because GetConsoleCP might return a

114

different one for console input. Not sure how to cope with

115

that. If the console Code page is not known we fall back to

116

the system code page. */

117

cpno = GetConsoleOutputCP ();

118

if (!cpno)

119

cpno = GetACP ();

120

sprintf (codepage, "CP%u", cpno );

121

/* Resolve alias. We use a long string string and not the usual

122

array to optimize if the code is taken to a DSO. Taken from

123

libiconv 1.9.2. */

124

newset = codepage;

125

for (aliases = ("CP936" "\0" "GBK" "\0"

126

"CP1361" "\0" "JOHAB" "\0"

127

"CP20127" "\0" "ASCII" "\0"

128

"CP20866" "\0" "KOI8-R" "\0"

129

"CP21866" "\0" "KOI8-RU" "\0"

130

"CP28591" "\0" "ISO-8859-1" "\0"

131

"CP28592" "\0" "ISO-8859-2" "\0"

132

"CP28593" "\0" "ISO-8859-3" "\0"

133

"CP28594" "\0" "ISO-8859-4" "\0"

134

"CP28595" "\0" "ISO-8859-5" "\0"

135

"CP28596" "\0" "ISO-8859-6" "\0"

136

"CP28597" "\0" "ISO-8859-7" "\0"

137

"CP28598" "\0" "ISO-8859-8" "\0"

138

"CP28599" "\0" "ISO-8859-9" "\0"

139

"CP28605" "\0" "ISO-8859-15" "\0"

140

"CP65001" "\0" "UTF-8" "\0");

141

*aliases;

142

aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)

143

{

144

if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))

145

{

146

newset = aliases + strlen (aliases) + 1;

147

break;

148

}

149

}

150

151

#else /*!HAVE_W32_SYSTEM*/

152

153

#ifdef HAVE_LANGINFO_CODESET

newset = nl_langinfo (CODESET);

#else

newset = "8859-1";

#endif

154

newset = nl_langinfo (CODESET);

155

#else /*!HAVE_LANGINFO_CODESET*/

156

/* Try to get the used charset from environment variables. */

157

static char codepage[30];

158

const char *lc, *dot, *mod;

159

160

strcpy (codepage, "iso-8859-1");

161

lc = getenv ("LC_ALL");

162

if (!lc || !*lc)

163

{

164

lc = getenv ("LC_CTYPE");

165

if (!lc || !*lc)

166

lc = getenv ("LANG");

167

}

168

if (lc && *lc)

169

{

170

dot = strchr (lc, '.');

171

if (dot)

172

{

173

mod = strchr (++dot, '@');

174

if (!mod)

175

mod = dot + strlen (dot);

176

if (mod - dot < sizeof codepage && dot != mod)

177

{

178

memcpy (codepage, dot, mod - dot);

179

codepage [mod - dot] = 0;

180

}

181

}

182

}

183

newset = codepage;

184

#endif /*!HAVE_LANGINFO_CODESET*/

185

#endif /*!HAVE_W32_SYSTEM*/

186

}

187

188

full_newset = newset;

189

if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))

190

{

191

newset += 3;

192

if (*newset == '-' || *newset == '_')

newset++;

193

newset++;

194

}

195

if (!*newset

|| !ascii_strcasecmp (newset, "8859-1")

|| !ascii_strcasecmp (newset, "8859-15"))

196

/* Note that we silently assume that plain ASCII is actually meant

197

as Latin-1. This makes sense because many Unix system don't have

198

their locale set up properly and thus would get annoying error

199

messages and we have to handle all the "bug" reports. Latin-1 has

200

always been the character set used for 8 bit characters on Unix

201

systems. */

202

if ( !*newset

203

|| !ascii_strcasecmp (newset, "8859-1" )

204

|| !ascii_strcasecmp (newset, "646" )

205

|| !ascii_strcasecmp (newset, "ASCII" )

206

|| !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )

207

)

100

208

{

101

209

active_charset_name = "iso-8859-1";

102

210

no_translation = 0;

103

211

active_charset = NULL;

104

}

105

else if (!ascii_strcasecmp (newset, "8859-2"))

106

{

107

active_charset_name = "iso-8859-2";

108

no_translation = 0;

109

active_charset = latin2_unicode;

110

}

111

else if (!ascii_strcasecmp (newset, "koi8-r"))

112

{

113

active_charset_name = "koi8-r";

114

no_translation = 0;

115

active_charset = koi8_unicode;

116

}

117

else if (!ascii_strcasecmp (newset, "utf8")

118

|| !ascii_strcasecmp (newset, "utf-8"))

212

use_iconv = 0;

213

}

214

else if ( !ascii_strcasecmp (newset, "utf8" )

215

|| !ascii_strcasecmp(newset, "utf-8") )

119

216

{

120

217

active_charset_name = "utf-8";

121

218

no_translation = 1;

122

219

active_charset = NULL;

220

use_iconv = 0;

123

221

}

124

222

else

125

return -1;

223

{

224

iconv_t cd;

225

226

#ifdef HAVE_W32_SYSTEM

227

if (load_libiconv ())

228

return -1;

229

#endif /*HAVE_W32_SYSTEM*/

230

231

cd = iconv_open (full_newset, "utf-8");

232

if (cd == (iconv_t)-1)

233

{

234

handle_iconv_error (full_newset, "utf-8", 0);

235

return -1;

236

}

237

iconv_close (cd);

238

cd = iconv_open ("utf-8", full_newset);

239

if (cd == (iconv_t)-1)

240

{

241

handle_iconv_error ("utf-8", full_newset, 0);

242

return -1;

243

}

244

iconv_close (cd);

245

active_charset_name = full_newset;

246

no_translation = 0;

247

active_charset = NULL;

248

use_iconv = 1;

249

}

126

250

return 0;

127

251

}

128

252

132

256

return active_charset_name;

133

257

}

134

258

135

/****************

136

* Convert string, which is in native encoding to UTF8 and return the

137

* new allocated UTF8 string.

138

259

260

/* Convert string, which is in native encoding to UTF8 and return a

261

new allocated UTF-8 string. */

139

262

char *

140

263

native_to_utf8 (const char *orig_string)

141

264

{

147

270

148

271

if (no_translation)

149

272

{

273

/* Already utf-8 encoded. */

150

274

buffer = jnlib_xstrdup (orig_string);

151

275

}

152

else if (active_charset)

153

{

154

for (s = string; *s; s++)

155

{

156

length++;

157

if (*s & 0x80)

158

length += 2; /* we may need 3 bytes */

159

}

160

buffer = jnlib_xmalloc (length + 1);

161

for (p = (unsigned char *)buffer, s = string; *s; s++)

162

{

163

if ((*s & 0x80))

164

{

165

ushort val = active_charset[*s & 0x7f];

166

if (val < 0x0800)

167

{

168

*p++ = 0xc0 | ((val >> 6) & 0x1f);

169

*p++ = 0x80 | (val & 0x3f);

170

}

171

else

172

{

173

*p++ = 0xe0 | ((val >> 12) & 0x0f);

174

*p++ = 0x80 | ((val >> 6) & 0x3f);

175

*p++ = 0x80 | (val & 0x3f);

176

}

177

}

178

else

179

*p++ = *s;

180

}

181

*p = 0;

182

}

183

else

184

{

276

else if (!active_charset && !use_iconv)

277

{

278

/* For Latin-1 we can avoid the iconv overhead. */

185

279

for (s = string; *s; s++)

186

280

{

187

281

length++;

191

285

buffer = jnlib_xmalloc (length + 1);

192

286

for (p = (unsigned char *)buffer, s = string; *s; s++)

193

287

{

194

if (*s & 0x80)

288

if ( (*s & 0x80 ))

195

289

{

196

290

*p++ = 0xc0 | ((*s >> 6) & 3);

197

291

*p++ = 0x80 | (*s & 0x3f);

201

295

}

202

296

*p = 0;

203

297

}

298

else

299

{

300

/* Need to use iconv. */

301

iconv_t cd;

302

const char *inptr;

303

char *outptr;

304

size_t inbytes, outbytes;

305

306

cd = iconv_open ("utf-8", active_charset_name);

307

if (cd == (iconv_t)-1)

308

{

309

handle_iconv_error ("utf-8", active_charset_name, 1);

310

return native_to_utf8 (string);

311

}

312

313

for (s=string; *s; s++ )

314

{

315

length++;

316

if ((*s & 0x80))

317

length += 5; /* We may need up to 6 bytes for the utf8 output. */

318

}

319

buffer = jnlib_xmalloc (length + 1);

320

321

inptr = string;

322

inbytes = strlen (string);

323

outptr = buffer;

324

outbytes = length;

325

if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,

326

&outptr, &outbytes) == (size_t)-1)

327

{

328

static int shown;

329

330

if (!shown)

331

log_info (_("conversion from `%s' to `%s' failed: %s\n"),

332

active_charset_name, "utf-8", strerror (errno));

333

shown = 1;

334

/* We don't do any conversion at all but use the strings as is. */

335

strcpy (buffer, string);

336

}

337

else /* Success. */

338

{

339

*outptr = 0;

340

/* We could realloc the buffer now but I doubt that it makes

341

much sense given that it will get freed anyway soon

342

after. */

343

}

344

iconv_close (cd);

345

}

204

346

return buffer;

205

347

}

206

348

207

349

208

/* Convert string, which is in UTF8 to native encoding. Replace

209

* illegal encodings by some "\xnn" and quote all control

210

* characters. A character with value DELIM will always be quoted, it

211

* must be a vanilla ASCII character. */

212

char *

213

utf8_to_native (const char *string, size_t length, int delim)

350

351

static char *

352

do_utf8_to_native (const char *string, size_t length, int delim,

353

int with_iconv)

214

354

{

215

355

int nleft;

216

356

int i;

217

357

unsigned char encbuf[8];

218

358

int encidx;

219

const byte *s;

359

const unsigned char *s;

220

360

size_t n;

221

361

char *buffer = NULL;

222

362

char *p = NULL;

224

364

size_t slen;

225

365

int resync = 0;

226

366

227

/* 1. pass (p==NULL): count the extended utf-8 characters */

228

/* 2. pass (p!=NULL): create string */

367

/* First pass (p==NULL): count the extended utf-8 characters. */

368

/* Second pass (p!=NULL): create string. */

229

369

for (;;)

230

370

{

231

371

for (slen = length, nleft = encidx = 0, n = 0,

232

s = (const unsigned char *)string; slen;

372

s = (const unsigned char *)string;

373

slen;

233

374

s++, slen--)

234

375

{

235

376

if (resync)

236

377

{

237

378

if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))

238

379

{

239

/* still invalid */

380

/* Still invalid. */

240

381

if (p)

241

382

{

242

383

sprintf (p, "\\x%02x", *s);

250

391

if (!nleft)

251

392

{

252

393

if (!(*s & 0x80))

253

{ /* plain ascii */

254

if (*s < 0x20 || *s == 0x7f || *s == delim ||

255

(delim && *s == '\\'))

394

{

395

/* Plain ascii. */

396

if ( delim != -1

397

&& (*s < 0x20 || *s == 0x7f || *s == delim

398

|| (delim && *s == '\\')))

256

399

{

257

400

n++;

258

401

if (p)

259

402

*p++ = '\\';

260

403

switch (*s)

261

404

{

262

case '\n':

263

n++;

264

if (p)

265

*p++ = 'n';

266

break;

267

case '\r':

268

n++;

269

if (p)

270

*p++ = 'r';

271

break;

272

case '\f':

273

n++;

274

if (p)

275

*p++ = 'f';

276

break;

277

case '\v':

278

n++;

279

if (p)

280

*p++ = 'v';

281

break;

282

case '\b':

283

n++;

284

if (p)

285

*p++ = 'b';

286

break;

287

case 0:

288

n++;

289

if (p)

290

*p++ = '0';

291

break;

405

case '\n': n++; if ( p ) *p++ = 'n'; break;

406

case '\r': n++; if ( p ) *p++ = 'r'; break;

407

case '\f': n++; if ( p ) *p++ = 'f'; break;

408

case '\v': n++; if ( p ) *p++ = 'v'; break;

409

case '\b': n++; if ( p ) *p++ = 'b'; break;

410

case 0: n++; if ( p ) *p++ = '0'; break;

292

411

default:

293

412

n += 3;

294

413

if (p)

306

425

n++;

307

426

}

308

427

}

309

else if ((*s & 0xe0) == 0xc0)

310

{ /* 110x xxxx */

428

else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */

429

{

311

430

val = *s & 0x1f;

312

431

nleft = 1;

313

432

encidx = 0;

314

433

encbuf[encidx++] = *s;

315

434

}

316

else if ((*s & 0xf0) == 0xe0)

317

{ /* 1110 xxxx */

435

else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */

436

{

318

437

val = *s & 0x0f;

319

438

nleft = 2;

320

439

encidx = 0;

321

440

encbuf[encidx++] = *s;

322

441

}

323

else if ((*s & 0xf8) == 0xf0)

324

{ /* 1111 0xxx */

442

else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */

443

{

325

444

val = *s & 0x07;

326

445

nleft = 3;

327

446

encidx = 0;

328

447

encbuf[encidx++] = *s;

329

448

}

330

else if ((*s & 0xfc) == 0xf8)

331

{ /* 1111 10xx */

449

else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */

450

{

332

451

val = *s & 0x03;

333

452

nleft = 4;

334

453

encidx = 0;

335

454

encbuf[encidx++] = *s;

336

455

}

337

else if ((*s & 0xfe) == 0xfc)

338

{ /* 1111 110x */

456

else if ((*s & 0xfe) == 0xfc) /* 1111 110x */

457

{

339

458

val = *s & 0x01;

340

459

nleft = 5;

341

460

encidx = 0;

342

461

encbuf[encidx++] = *s;

343

462

}

344

else

345

{ /* invalid encoding: print as \xnn */

463

else /* Invalid encoding: print as \xNN. */

464

{

346

465

if (p)

347

466

{

348

467

sprintf (p, "\\x%02x", *s);

352

471

resync = 1;

353

472

}

354

473

}

355

else if (*s < 0x80 || *s >= 0xc0)

356

{ /* invalid */

474

else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */

475

{

357

476

if (p)

358

477

{

359

478

for (i = 0; i < encidx; i++)

374

493

encbuf[encidx++] = *s;

375

494

val <<= 6;

376

495

val |= *s & 0x3f;

377

if (!--nleft)

378

{ /* ready */

496

if (!--nleft) /* Ready. */

497

{

379

498

if (no_translation)

380

499

{

381

500

if (p)

386

505

n += encidx;

387

506

encidx = 0;

388

507

}

389

else if (active_charset)

390

{ /* table lookup */

391

for (i = 0; i < 128; i++)

392

{

393

if (active_charset[i] == val)

394

break;

395

}

396

if (i < 128)

397

{ /* we can print this one */

398

if (p)

399

*p++ = i + 128;

400

n++;

401

}

402

else

403

{ /* we do not have a translation: print utf8 */

404

if (p)

405

{

406

for (i = 0; i < encidx; i++)

407

{

408

sprintf (p, "\\x%02x", encbuf[i]);

409

p += 4;

410

}

411

}

412

n += encidx * 4;

413

encidx = 0;

414

}

415

}

416

else

417

{ /* native set */

508

else if (with_iconv)

509

{

510

/* Our strategy for using iconv is a bit strange

511

but it better keeps compatibility with

512

previous versions in regard to how invalid

513

encodings are displayed. What we do is to

514

keep the utf-8 as is and have the real

515

translation step then at the end. Yes, I

516

know that this is ugly. However we are short

517

of the 1.4 release and for this branch we

518

should not mess too much around with iconv

519

things. One reason for this is that we don't

520

know enough about non-GNU iconv

521

implementation and want to minimize the risk

522

of breaking the code on too many platforms. */

523

if ( p )

524

{

525

for (i=0; i < encidx; i++ )

526

*p++ = encbuf[i];

527

}

528

n += encidx;

529

encidx = 0;

530

}

531

else /* Latin-1 case. */

532

{

418

533

if (val >= 0x80 && val < 256)

419

534

{

420

n++; /* we can simply print this character */

535

/* We can simply print this character */

536

n++;

421

537

if (p)

422

538

*p++ = val;

423

539

}

424

540

else

425

{ /* we do not have a translation: print utf8 */

541

{

542

/* We do not have a translation: print utf8. */

426

543

if (p)

427

544

{

428

545

for (i = 0; i < encidx; i++)

440

557

}

441

558

}

442

559

if (!buffer)

443

{ /* allocate the buffer after the first pass */

560

{

561

/* Allocate the buffer after the first pass. */

444

562

buffer = p = jnlib_xmalloc (n + 1);

445

563

}

446

else

564

else if (with_iconv)

565

{

566

/* Note: See above for comments. */

567

iconv_t cd;

568

const char *inptr;

569

char *outbuf, *outptr;

570

size_t inbytes, outbytes;

571

572

*p = 0; /* Terminate the buffer. */

573

574

cd = iconv_open (active_charset_name, "utf-8");

575

if (cd == (iconv_t)-1)

576

{

577

handle_iconv_error (active_charset_name, "utf-8", 1);

578

jnlib_free (buffer);

579

return utf8_to_native (string, length, delim);

580

}

581

582

/* Allocate a new buffer large enough to hold all possible

583

encodings. */

584

n = p - buffer + 1;

585

inbytes = n - 1;;

586

inptr = buffer;

587

outbytes = n * MB_LEN_MAX;

588

if (outbytes / MB_LEN_MAX != n)

589

BUG (); /* Actually an overflow. */

590

outbuf = outptr = jnlib_xmalloc (outbytes);

591

if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,

592

&outptr, &outbytes) == (size_t)-1)

593

{

594

static int shown;

595

596

if (!shown)

597

log_info (_("conversion from `%s' to `%s' failed: %s\n"),

598

"utf-8", active_charset_name, strerror (errno));

599

shown = 1;

600

/* Didn't worked out. Try again but without iconv. */

601

jnlib_free (buffer);

602

buffer = NULL;

603

jnlib_free (outbuf);

604

outbuf = do_utf8_to_native (string, length, delim, 0);

605

}

606

else /* Success. */

607

{

608

*outptr = 0; /* Make sure it is a string. */

609

/* We could realloc the buffer now but I doubt that it

610

makes much sense given that it will get freed

611

anyway soon after. */

612

jnlib_free (buffer);

613

}

614

iconv_close (cd);

615

return outbuf;

616

}

617

else /* Not using iconv. */

447

618

{

448

*p = 0; /* make a string */

619

*p = 0; /* Make sure it is a string. */

449

620

return buffer;

450

621

}

451

622

}

452

623

}

624

625

/* Convert string, which is in UTF-8 to native encoding. Replace

626

illegal encodings by some "\xnn" and quote all control

627

characters. A character with value DELIM will always be quoted, it

628

must be a vanilla ASCII character. A DELIM value of -1 is special:

629

it disables all quoting of control characters. */

630

char *

631

utf8_to_native (const char *string, size_t length, int delim)

632

{

633

return do_utf8_to_native (string, length, delim, use_iconv);

634

}

Older »