~ubuntu-branches/ubuntu/precise/gnupg2/precise-updates

« back to all changes in this revision

Viewing changes to jnlib/utf8conv.c

Committer: Bazaar Package Importer
Author(s): Andreas Mueller
Date: 2005-03-29 10:30:32 UTC
Revision ID: james.westby@ubuntu.com-20050329103032-sj42n2ain3ipx310

Tags: upstream-1.9.15

Import upstream version 1.9.15

files added:

ABOUT-NLS

AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

NEWS

README

README.CVS

THANKS

TODO

acinclude.m4

agent

agent/ChangeLog

agent/Makefile.am

agent/agent.h

agent/cache.c

agent/call-scd.c

agent/command.c

agent/divert-scd.c

agent/findkey.c

agent/genkey.c

agent/gpg-agent.c

agent/keyformat.txt

agent/learncard.c

agent/minip12.c

agent/minip12.h

agent/pkdecrypt.c

agent/pksign.c

agent/preset-passphrase.c

agent/protect-tool.c

agent/protect.c

agent/query.c

agent/trans.c

agent/trustlist.c

am/cmacros.am

artwork

artwork/README

artwork/ascii-key

artwork/gnu-head-sm.jpg

artwork/gnu1.jpg

artwork/gnu5.png

artwork/gnud.png

artwork/gnue.png

artwork/gnue2.png

artwork/gnupg-1.0-available-now.png

artwork/gnupg-1.0-ist-da.png

artwork/gnupg-badge-openpgp.cdr.gz

artwork/gnupg-badge-openpgp.eps.gz

artwork/gnupg-badge-openpgp.jpg

artwork/gnupg-logo-black.jpg

artwork/gnupg-logo-cia.jpg

artwork/gnupg-logo-cia2.jpg

artwork/gnurieg.png

artwork/gpglogo.eps.gz

artwork/gpglogo.fig

artwork/gpglogo.jpg

artwork/tl-gnu1.jpg

autogen.sh

build-w32.sh

cipher

cipher/ChangeLog

cipher/algorithms.h

cipher/blowfish.c

cipher/cast5.c

cipher/cipher.c

cipher/des.c

cipher/idea-stub.c

cipher/md.c

cipher/rijndael.c

cipher/rndunix.c

cipher/rsa.c

cipher/twofish.c

common

common/ChangeLog

common/Makefile.am

common/README

common/asshelp.c

common/asshelp.h

common/b64enc.c

common/dynload.h

common/errors.h

common/exechelp.c

common/exechelp.h

common/fseeko.c

common/ftello.c

common/gettime.c

common/homedir.c

common/i18n.h

common/iobuf.c

common/iobuf.h

common/isascii.c

common/maperror.c

common/membuf.c

common/membuf.h

common/miscellaneous.c

common/mkdtemp.c

common/mkerrors

common/mkerrtok

common/putc_unlocked.c

common/sexp-parse.h

common/signal.c

common/simple-gettext.c

common/simple-pwquery.c

common/simple-pwquery.h

common/strsep.c

common/sysutils.c

common/sysutils.h

common/ttyio.c

common/ttyio.h

common/ttyname.c

common/util.h

common/vasprintf.c

common/w32reg.c

common/xasprintf.c

common/xreadline.c

common/yesno.c

configure.ac

doc/ChangeLog

doc/DETAILS

doc/Makefile.am

doc/Notes

doc/TRANSLATE

doc/assuan.texi

doc/contrib.texi

doc/debugging.texi

doc/fdl.texi

doc/glossary.texi

doc/gnupg-badge-openpgp.eps

doc/gnupg-badge-openpgp.jpg

doc/gnupg.texi

doc/gpg-agent.texi

doc/gpg.ru.sgml

doc/gpg.sgml

doc/gpg.texi

doc/gpgsm.texi

doc/gpgv.texi

doc/gpl.texi

doc/highlights-1.4.txt

doc/mksamplekeys

doc/samplekeys.asc

doc/scdaemon.texi

doc/sysnotes.texi

doc/tools.texi

g10/ChangeLog

g10/Makefile.am

g10/OPTIONS

g10/apdu.c

g10/apdu.h

g10/app-common.h

g10/app-openpgp.c

g10/armor.c

g10/build-packet.c

g10/call-agent.c

g10/call-agent.h

g10/card-util.c

g10/cardglue.c

g10/cardglue.h

g10/ccid-driver.c

g10/ccid-driver.h

g10/cipher.c

g10/comment.c

g10/compress.c

g10/dearmor.c

g10/decrypt.c

g10/delkey.c

g10/encode.c

g10/encr-data.c

g10/exec.c

g10/exec.h

g10/export.c

g10/filter.h

g10/free-packet.c

g10/g10.c

g10/getkey.c

g10/global.h

g10/gpg.h

g10/gpgv.c

g10/helptext.c

g10/import.c

g10/iso7816.c

g10/iso7816.h

g10/kbnode.c

g10/keydb.c

g10/keydb.h

g10/keyedit.c

g10/keygen.c

g10/keyid.c

g10/keylist.c

g10/keyring.c

g10/keyring.h

g10/keyserver-internal.h

g10/keyserver.c

g10/main.h

g10/mainproc.c

g10/mdfilter.c

g10/misc.c

g10/mkdtemp.c

g10/openfile.c

g10/options.h

g10/options.skel

g10/packet.h

g10/parse-packet.c

g10/passphrase.c

g10/photoid.c

g10/photoid.h

g10/pipemode.c

g10/pkclist.c

g10/pkglue.c

g10/pkglue.h

g10/plaintext.c

g10/progress.c

g10/pubkey-enc.c

g10/pubring.asc

g10/revoke.c

g10/seckey-cert.c

g10/seskey.c

g10/sig-check.c

g10/sign.c

g10/signal.c

g10/skclist.c

g10/status.c

g10/status.h

g10/tdbdump.c

g10/tdbio.c

g10/tdbio.h

g10/textfilter.c

g10/trustdb.c

g10/trustdb.h

g10/verify.c

include

include/ChangeLog

include/_regex.h

include/cipher.h

include/distfiles

include/dynload.h

include/errors.h

include/host2net.h

include/http.h

include/i18n.h

include/keyserver.h

include/memory.h

include/mpi.h

include/types.h

include/util.h

include/zlib-riscos.h

intl

intl/ChangeLog

intl/Makefile.in

intl/VERSION

intl/bindtextdom.c

intl/config.charset

intl/dcgettext.c

intl/dcigettext.c

intl/dcngettext.c

intl/dgettext.c

intl/dngettext.c

intl/eval-plural.h

intl/explodename.c

intl/finddomain.c

intl/gettext.c

intl/gettextP.h

intl/gmo.h

intl/hash-string.h

intl/intl-compat.c

intl/l10nflist.c

intl/libgnuintl.h

intl/libgnuintl.h.in

intl/loadinfo.h

intl/loadmsgcat.c

intl/localcharset.c

intl/localcharset.h

intl/locale.alias

intl/localealias.c

intl/localename.c

intl/log.c

intl/ngettext.c

intl/os2compat.c

intl/os2compat.h

intl/osdep.c

intl/plural-exp.c

intl/plural-exp.h

intl/plural.c

intl/plural.y

intl/printf-args.c

intl/printf-args.h

intl/printf-parse.c

intl/printf-parse.h

intl/printf.c

intl/ref-add.sin

intl/ref-del.sin

intl/relocatable.c

intl/relocatable.h

intl/textdomain.c

intl/vasnprintf.c

intl/vasnprintf.h

intl/vasnwprintf.h

intl/wprintf-parse.h

intl/xsize.h

jnlib

jnlib/ChangeLog

jnlib/Makefile.am

jnlib/README

jnlib/argparse.c

jnlib/argparse.h

jnlib/dotlock.c

jnlib/dotlock.h

jnlib/libjnlib-config.h

jnlib/logging.c

jnlib/logging.h

jnlib/mischelp.h

jnlib/stringhelp.c

jnlib/stringhelp.h

jnlib/strlist.c

jnlib/strlist.h

jnlib/types.h

jnlib/utf8conv.c

jnlib/utf8conv.h

jnlib/w32-afunix.c

jnlib/w32-afunix.h

jnlib/w32-pth.c

jnlib/w32-pth.h

jnlib/xmalloc.c

jnlib/xmalloc.h

kbx/ChangeLog

kbx/Makefile.am

kbx/Manifest

kbx/kbxutil.c

kbx/keybox-blob.c

kbx/keybox-defs.h

kbx/keybox-dump.c

kbx/keybox-errors.c

kbx/keybox-file.c

kbx/keybox-init.c

kbx/keybox-openpgp.c

kbx/keybox-search-desc.h

kbx/keybox-search.c

kbx/keybox-update.c

kbx/keybox-util.c

kbx/keybox.h

kbx/mkerrors

keyserver

keyserver/ChangeLog

keyserver/Makefile.am

keyserver/gpgkeys_curl.c

keyserver/gpgkeys_finger.c

keyserver/gpgkeys_hkp.c

keyserver/gpgkeys_http.c

keyserver/gpgkeys_ldap.c

keyserver/gpgkeys_mailto.in

keyserver/gpgkeys_test.in

keyserver/ksutil.c

keyserver/ksutil.h

m4/ChangeLog

m4/Makefile.am

m4/Makefile.in

m4/codeset.m4

m4/gettext.m4

m4/glibc21.m4

m4/gpg-error.m4

m4/iconv.m4

m4/intdiv0.m4

m4/intmax.m4

m4/inttypes-pri.m4

m4/inttypes.m4

m4/inttypes_h.m4

m4/isc-posix.m4

m4/ksba.m4

m4/lcmessage.m4

m4/lib-ld.m4

m4/lib-link.m4

m4/lib-prefix.m4

m4/libassuan.m4

m4/libcurl.m4

m4/libgcrypt.m4

m4/libusb.m4

m4/longdouble.m4

m4/longlong.m4

m4/nls.m4

m4/po.m4

m4/printf-posix.m4

m4/progtest.m4

m4/readline.m4

m4/signed.m4

m4/size_max.m4

m4/stdint_h.m4

m4/uintmax_t.m4

m4/ulonglong.m4

m4/wchar_t.m4

m4/wint_t.m4

m4/xsize.m4

mpi/ChangeLog

mpi/Makefile.am

mpi/config.links

mpi/mpicoder.c

po/ChangeLog

po/LINGUAS

po/LINGUAS.contrib

po/Makefile.in.in

po/Makevars

po/POTFILES.in

po/Rules-quot

po/be.po

po/boldquot.sed

po/ca.po

po/cs.po

po/da.po

po/de.po

po/el.po

po/en@boldquot.gmo

po/en@boldquot.header

po/en@boldquot.po

po/en@quot.gmo

po/en@quot.header

po/en@quot.po

po/eo.po

po/es.po

po/et.po

po/fi.po

po/fr.po

po/gl.po

po/hu.po

po/id.po

po/insert-header.sin

po/it.po

po/ja.po

po/nl.po

po/pl.po

po/pt.po

po/pt_BR.po

po/quot.sed

po/remove-potcdate.sed

po/remove-potcdate.sin

po/ro.po

po/ru.po

po/sk.po

po/sv.po

po/tr.po

po/zh_CN.po

po/zh_TW.po

preset-passphrase.c

scd/ChangeLog

scd/Makefile.am

scd/apdu.c

scd/apdu.h

scd/app-common.h

scd/app-dinsig.c

scd/app-help.c

scd/app-nks.c

scd/app-openpgp.c

scd/app-p15.c

scd/app.c

scd/atr.c

scd/atr.h

scd/card-common.h

scd/card-dinsig.c

scd/card-p15.c

scd/card.c

scd/ccid-driver.c

scd/ccid-driver.h

scd/command.c

scd/iso7816.c

scd/iso7816.h

scd/pcsc-wrapper.c

scd/sc-copykeys.c

scd/scdaemon.c

scd/scdaemon.h

scd/tlv.c

scd/tlv.h

scripts

scripts/ChangeLog

scripts/autogen.sh

scripts/compile

scripts/config.guess

scripts/config.rpath

scripts/config.sub

scripts/depcomp

scripts/install-sh

scripts/mail-to-translators

scripts/mdate-sh

scripts/missing

scripts/mk-gpg-texi

scripts/mk-w32-dist

scripts/mkinstalldirs

scripts/texinfo.tex

scripts/w32installer.nsi

sm/ChangeLog

sm/Makefile.am

sm/base64.c

sm/call-agent.c

sm/call-dirmngr.c

sm/certchain.c

sm/certcheck.c

sm/certdump.c

sm/certlist.c

sm/certreqgen.c

sm/decrypt.c

sm/delete.c

sm/encrypt.c

sm/export.c

sm/fingerprint.c

sm/gpgsm.c

sm/gpgsm.h

sm/import.c

sm/keydb.c

sm/keydb.h

sm/keylist.c

sm/misc.c

sm/server.c

sm/sign.c

sm/verify.c

tests

tests/567064FE6D14A17B2D811ABB407728BC558AA455

tests/ChangeLog

tests/Makefile.am

tests/asschk.c

tests/cert_cci_sphinx_ca.pem

tests/cert_cci_test_wzs.pem

tests/cert_cci_test_zs.pem

tests/cert_cci_user02.pem

tests/cert_cci_user03.pem

tests/cert_cci_user04.pem

tests/cert_cci_user06.pem

tests/cert_cci_user07.pem

tests/cert_testpki_testpca.pem

tests/crl_testpki_testpca.pem

tests/extrasamples

tests/extrasamples/README

tests/extrasamples/dod-test9.p12

tests/inittests

tests/key_g10code_pete1.pem

tests/key_g10code_theo1.pem

tests/pkits

tests/pkits/ChangeLog

tests/pkits/Makefile.am

tests/pkits/PKITS_data.tar.bz2

tests/pkits/README

tests/pkits/common.sh

tests/pkits/import-all-certs

tests/pkits/import-all-certs.data

tests/pkits/inittests

tests/pkits/runtest

tests/pkits/validate-all-certs

tests/runtest

tests/samplekeys

tests/samplekeys/32100C27173EF6E9C4E9A25D3D69F86D37A4F939.key

tests/samplekeys/8D6D17B4B94BBE8304926C016D2C5C7805EB6705.key

tests/samplekeys/README

tests/samplekeys/cert_g10code_enconly_1.pem

tests/samplekeys/cert_g10code_pete1.pem

tests/samplekeys/cert_g10code_test1.pem

tests/samplekeys/cert_g10code_test_ca.pem

tests/samplekeys/cert_g10code_theo1.pem

tests/samplekeys/cryptlib-key.p12

tests/samplekeys/opensc-test.p12

tests/samplekeys/ossl-rentec-user.pem

tests/samplekeys/ov-server.p12

tests/samplekeys/ov-user.p12

tests/samplekeys/webdeca.der

tests/samplekeys/webderoot.der

tests/samplemsgs

tests/samplemsgs/webde-buenemann-signed.cms

tests/skey_g10code_test1.pem

tests/sm-sign+verify

tests/sm-verify

tests/text-1.dsig.pem

tests/text-1.osig-bad.pem

tests/text-1.osig-badusage.pem

tests/text-1.osig.pem

tests/text-1.txt

tests/text-2.osig-bad.pem

tests/text-2.osig.pem

tests/text-2.txt

tests/text-3.txt

tools

tools/ChangeLog

tools/Makefile.am

tools/Manifest

tools/README.gpgconf

tools/addgnupghome

tools/bftest.c

tools/der-to-pem

tools/gpgconf-comp.c

tools/gpgconf.c

tools/gpgconf.h

tools/gpgparsemail.c

tools/gpgsm-gencert.sh

tools/mpicalc.c

tools/no-libgcrypt.c

tools/pgpgroup-to-gpggroup

tools/rfc822parse.c

tools/rfc822parse.h

tools/shmtest.c

tools/watchgnupg.c

util

util/ChangeLog

util/dotlock.c

util/errors.c

util/fileutil.c

util/http.c

util/iobuf.c

util/logger.c

util/secmem.c

util/simple-gettext.c

util/srv.h

util/strgutil.c

util/ttyio.c

Show diffs side-by-side

added added

removed removed

jnlib/utf8conv.c

/* utf8conf.c - UTF8 character set conversion

* 2003 Free Software Foundation, Inc.

* This file is part of GnuPG.

* GnuPG is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

* GnuPG is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA

#include <config.h>

#include <stdlib.h>

#include <string.h>

#include <stdarg.h>

#include <ctype.h>

#ifdef HAVE_LANGINFO_CODESET

#include <langinfo.h>

#endif

#include "libjnlib-config.h"

#include "stringhelp.h"

#include "utf8conv.h"

static ushort koi8_unicode[128] = {

0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,

0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,

0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,

0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,

0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,

0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,

0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,

0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,

0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,

0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,

0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,

0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,

0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,

0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,

0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,

0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a

};

static ushort latin2_unicode[128] = {

0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,

0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,

0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,

0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,

0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,

0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,

0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,

0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,

0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,

0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,

0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,

0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,

0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,

0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,

0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,

0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9

};

static const char *active_charset_name = "iso-8859-1";

static ushort *active_charset = NULL;

static int no_translation = 0;

int

set_native_charset (const char *newset)

{

if (!newset)

#ifdef HAVE_LANGINFO_CODESET

newset = nl_langinfo (CODESET);

#else

newset = "8859-1";

#endif

if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))

{

newset += 3;

if (*newset == '-' || *newset == '_')

newset++;

}

if (!*newset

|| !ascii_strcasecmp (newset, "8859-1")

|| !ascii_strcasecmp (newset, "8859-15"))

{

100

active_charset_name = "iso-8859-1";

101

no_translation = 0;

102

active_charset = NULL;

103

}

104

else if (!ascii_strcasecmp (newset, "8859-2"))

105

{

106

active_charset_name = "iso-8859-2";

107

no_translation = 0;

108

active_charset = latin2_unicode;

109

}

110

else if (!ascii_strcasecmp (newset, "koi8-r"))

111

{

112

active_charset_name = "koi8-r";

113

no_translation = 0;

114

active_charset = koi8_unicode;

115

}

116

else if (!ascii_strcasecmp (newset, "utf8")

117

|| !ascii_strcasecmp (newset, "utf-8"))

118

{

119

active_charset_name = "utf-8";

120

no_translation = 1;

121

active_charset = NULL;

122

}

123

else

124

return -1;

125

return 0;

126

}

127

128

const char *

129

get_native_charset ()

130

{

131

return active_charset_name;

132

}

133

134

/****************

135

* Convert string, which is in native encoding to UTF8 and return the

136

* new allocated UTF8 string.

137

138

char *

139

native_to_utf8 (const char *string)

140

{

141

const byte *s;

142

char *buffer;

143

byte *p;

144

size_t length = 0;

145

146

if (no_translation)

147

{

148

buffer = jnlib_xstrdup (string);

149

}

150

else if (active_charset)

151

{

152

for (s = string; *s; s++)

153

{

154

length++;

155

if (*s & 0x80)

156

length += 2; /* we may need 3 bytes */

157

}

158

buffer = jnlib_xmalloc (length + 1);

159

for (p = buffer, s = string; *s; s++)

160

{

161

if ((*s & 0x80))

162

{

163

ushort val = active_charset[*s & 0x7f];

164

if (val < 0x0800)

165

{

166

*p++ = 0xc0 | ((val >> 6) & 0x1f);

167

*p++ = 0x80 | (val & 0x3f);

168

}

169

else

170

{

171

*p++ = 0xe0 | ((val >> 12) & 0x0f);

172

*p++ = 0x80 | ((val >> 6) & 0x3f);

173

*p++ = 0x80 | (val & 0x3f);

174

}

175

}

176

else

177

*p++ = *s;

178

}

179

*p = 0;

180

}

181

else

182

{

183

for (s = string; *s; s++)

184

{

185

length++;

186

if (*s & 0x80)

187

length++;

188

}

189

buffer = jnlib_xmalloc (length + 1);

190

for (p = buffer, s = string; *s; s++)

191

{

192

if (*s & 0x80)

193

{

194

*p++ = 0xc0 | ((*s >> 6) & 3);

195

*p++ = 0x80 | (*s & 0x3f);

196

}

197

else

198

*p++ = *s;

199

}

200

*p = 0;

201

}

202

return buffer;

203

}

204

205

206

/* Convert string, which is in UTF8 to native encoding. Replace

207

* illegal encodings by some "\xnn" and quote all control

208

* characters. A character with value DELIM will always be quoted, it

209

* must be a vanilla ASCII character. */

210

char *

211

utf8_to_native (const char *string, size_t length, int delim)

212

{

213

int nleft;

214

int i;

215

byte encbuf[8];

216

int encidx;

217

const byte *s;

218

size_t n;

219

byte *buffer = NULL, *p = NULL;

220

unsigned long val = 0;

221

size_t slen;

222

int resync = 0;

223

224

/* 1. pass (p==NULL): count the extended utf-8 characters */

225

/* 2. pass (p!=NULL): create string */

226

for (;;)

227

{

228

for (slen = length, nleft = encidx = 0, n = 0, s = string; slen;

229

s++, slen--)

230

{

231

if (resync)

232

{

233

if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))

234

{

235

/* still invalid */

236

if (p)

237

{

238

sprintf (p, "\\x%02x", *s);

239

p += 4;

240

}

241

n += 4;

242

continue;

243

}

244

resync = 0;

245

}

246

if (!nleft)

247

{

248

if (!(*s & 0x80))

249

{ /* plain ascii */

250

if (*s < 0x20 || *s == 0x7f || *s == delim ||

251

(delim && *s == '\\'))

252

{

253

n++;

254

if (p)

255

*p++ = '\\';

256

switch (*s)

257

{

258

case '\n':

259

n++;

260

if (p)

261

*p++ = 'n';

262

break;

263

case '\r':

264

n++;

265

if (p)

266

*p++ = 'r';

267

break;

268

case '\f':

269

n++;

270

if (p)

271

*p++ = 'f';

272

break;

273

case '\v':

274

n++;

275

if (p)

276

*p++ = 'v';

277

break;

278

case '\b':

279

n++;

280

if (p)

281

*p++ = 'b';

282

break;

283

case 0:

284

n++;

285

if (p)

286

*p++ = '0';

287

break;

288

default:

289

n += 3;

290

if (p)

291

{

292

sprintf (p, "x%02x", *s);

293

p += 3;

294

}

295

break;

296

}

297

}

298

else

299

{

300

if (p)

301

*p++ = *s;

302

n++;

303

}

304

}

305

else if ((*s & 0xe0) == 0xc0)

306

{ /* 110x xxxx */

307

val = *s & 0x1f;

308

nleft = 1;

309

encidx = 0;

310

encbuf[encidx++] = *s;

311

}

312

else if ((*s & 0xf0) == 0xe0)

313

{ /* 1110 xxxx */

314

val = *s & 0x0f;

315

nleft = 2;

316

encidx = 0;

317

encbuf[encidx++] = *s;

318

}

319

else if ((*s & 0xf8) == 0xf0)

320

{ /* 1111 0xxx */

321

val = *s & 0x07;

322

nleft = 3;

323

encidx = 0;

324

encbuf[encidx++] = *s;

325

}

326

else if ((*s & 0xfc) == 0xf8)

327

{ /* 1111 10xx */

328

val = *s & 0x03;

329

nleft = 4;

330

encidx = 0;

331

encbuf[encidx++] = *s;

332

}

333

else if ((*s & 0xfe) == 0xfc)

334

{ /* 1111 110x */

335

val = *s & 0x01;

336

nleft = 5;

337

encidx = 0;

338

encbuf[encidx++] = *s;

339

}

340

else

341

{ /* invalid encoding: print as \xnn */

342

if (p)

343

{

344

sprintf (p, "\\x%02x", *s);

345

p += 4;

346

}

347

n += 4;

348

resync = 1;

349

}

350

}

351

else if (*s < 0x80 || *s >= 0xc0)

352

{ /* invalid */

353

if (p)

354

{

355

for (i = 0; i < encidx; i++)

356

{

357

sprintf (p, "\\x%02x", encbuf[i]);

358

p += 4;

359

}

360

sprintf (p, "\\x%02x", *s);

361

p += 4;

362

}

363

n += 4 + 4 * encidx;

364

nleft = 0;

365

encidx = 0;

366

resync = 1;

367

}

368

else

369

{

370

encbuf[encidx++] = *s;

371

val <<= 6;

372

val |= *s & 0x3f;

373

if (!--nleft)

374

{ /* ready */

375

if (no_translation)

376

{

377

if (p)

378

{

379

for (i = 0; i < encidx; i++)

380

*p++ = encbuf[i];

381

}

382

n += encidx;

383

encidx = 0;

384

}

385

else if (active_charset)

386

{ /* table lookup */

387

for (i = 0; i < 128; i++)

388

{

389

if (active_charset[i] == val)

390

break;

391

}

392

if (i < 128)

393

{ /* we can print this one */

394

if (p)

395

*p++ = i + 128;

396

n++;

397

}

398

else

399

{ /* we do not have a translation: print utf8 */

400

if (p)

401

{

402

for (i = 0; i < encidx; i++)

403

{

404

sprintf (p, "\\x%02x", encbuf[i]);

405

p += 4;

406

}

407

}

408

n += encidx * 4;

409

encidx = 0;

410

}

411

}

412

else

413

{ /* native set */

414

if (val >= 0x80 && val < 256)

415

{

416

n++; /* we can simply print this character */

417

if (p)

418

*p++ = val;

419

}

420

else

421

{ /* we do not have a translation: print utf8 */

422

if (p)

423

{

424

for (i = 0; i < encidx; i++)

425

{

426

sprintf (p, "\\x%02x", encbuf[i]);

427

p += 4;

428

}

429

}

430

n += encidx * 4;

431

encidx = 0;

432

}

433

}

434

}

435

436

}

437

}

438

if (!buffer)

439

{ /* allocate the buffer after the first pass */

440

buffer = p = jnlib_xmalloc (n + 1);

441

}

442

else

443

{

444

*p = 0; /* make a string */

445

return buffer;

446

}

447

}

448

}

Older »