~vcs-imports/libiconv/trunk

Viewing changes to tests/test-bom-state.c

Committer: Bruno Haible
Date: 2024-12-15 12:23:08 UTC
Revision ID: git-v1:8d618a87265040dc882b451e39c6a39e610395be

Prepare for version 1.18.

* configure.ac: Bump version number to 1.18.
* include/iconv.h.in (_LIBICONV_VERSION): Likewise.
* lib/Makefile.in (LIBICONV_VERSION_INFO): Bump to 9:0:7.
* src/iconv.c (print_version): Update copyright year.
* windows/iconv.rc: Likewise.
* windows/libiconv.rc: Likewise.
* README: Update download link.

files added:
po/insert-header.sed

po/remove-potcdate.sed

tests/test-bom-state.c

tests/test-discard.c

files removed:
po/insert-header.sin

po/remove-potcdate.sin

files modified:
.gitignore

COPYING.LIB

ChangeLog

INSTALL.windows

Makefile.devel

NEWS

NOTES

README

autogen.sh

build-aux/ltmain.sh

configure.ac

gnulib-local/lib/xmalloc.c

gnulib-local/m4/alloca.m4

include/iconv.h.in

lib/Makefile.in

lib/converters.h

lib/iconv.c

lib/iconv_open1.h

lib/iconv_open2.h

lib/iso2022_jp2.h

lib/iso2022_jpms.h

lib/loop_unicode.h

lib/loop_wchar.h

lib/translit.def

lib/ucs2.h

lib/ucs4.h

lib/ucs4be.h

lib/ucs4le.h

lib/utf16.h

lib/utf32.h

lib/utf32be.h

lib/utf32le.h

libcharset/COPYING.LIB

libcharset/ChangeLog

libcharset/autogen.sh

libcharset/build-aux/ltmain.sh

libcharset/m4/libtool.m4

libcharset/m4/ltoptions.m4

libcharset/m4/ltsugar.m4

libcharset/m4/ltversion.m4

libcharset/m4/lt~obsolete.m4

m4/libtool.m4

m4/ltoptions.m4

m4/ltsugar.m4

m4/ltversion.m4

m4/lt~obsolete.m4

man/iconv.1

man/iconv.3

man/iconv_close.3

man/iconv_open.3

man/iconvctl.3

po/Makefile.in.in

po/Rules-quot

po/boldquot.sed

po/en@boldquot.header

po/en@quot.header

po/quot.sed

src/iconv.c

tests/Makefile.in

windows/iconv.rc

windows/libiconv.rc

Show diffs side-by-side

added added

removed removed

tests/test-bom-state.c

This file is part of the GNU LIBICONV Library.

The GNU LIBICONV Library is free software; you can redistribute it

and/or modify it under the terms of the GNU Lesser General Public

License as published by the Free Software Foundation; either version 2.1

of the License, or (at your option) any later version.

The GNU LIBICONV Library is distributed in the hope that it will be

useful, but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public

License along with the GNU LIBICONV Library; see the file COPYING.LIB.

If not, see <https://www.gnu.org/licenses/>. */

#include "config.h"

#include <stdlib.h>

#include <string.h>

#include <iconv.h>

#include <errno.h>

/* This test checks that iconv(cd,NULL,NULL,...) does not forget about

the byte-order state in conversions from UCS-2, UCS-4, UTF-16, UTF-32.

The POSIX specification

<https://pubs.opengroup.org/onlinepubs/9799919799/functions/iconv.html>

is clear that iconv(cd,NULL,NULL,...) has an effect for state-dependent

encodings only. The manual page

<https://www.kernel.org/doc/man-pages/online/pages/man3/iconv.3.html>

is not so clear about it. But Ulrich Drepper states it correctly in

<https://bugzilla.redhat.com/show_bug.cgi?id=165368>:

"Flushing using iconv() only resets the shift state. This is needed

for stateful encodings with states where the caller wants a converted

string to end in the initial state. The BOM recognition has nothing

to do with shift states. Once the byte order is determined this is

a property which stays with the iconv_t descriptor for its lifetime."

Based on a bug report from Tomas Kalibera <tomas.kalibera@gmail.com> in

<https://lists.gnu.org/archive/html/bug-gnu-libiconv/2024-12/msg00000.html>.

static void test_one_input (const char *fromcode,

const char *input, size_t input_size)

{

char outbuf1[3];

char outbuf2[3];

iconv_t cd = iconv_open ("UTF-8", fromcode);

if (cd == (iconv_t)(-1))

abort ();

/* Convert the first character. */

char *inbuf = (char *) input;

size_t inbytesleft = input_size;

char *outbuf = outbuf1;

size_t outbytesleft = sizeof (outbuf1);

size_t ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);

if (!(ret == (size_t)(-1) && errno == E2BIG && outbytesleft == 0))

abort ();

if (!(memcmp (outbuf1, "\xe2\x94\xa6", 3) == 0)) /* should be U+2526 */

abort ();

/* Reset the shift state. */

ret = iconv (cd, NULL, NULL, NULL, NULL);

if (!(ret == 0))

abort ();

/* Convert the second character. */

outbuf = outbuf2;

outbytesleft = sizeof (outbuf2);

ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);

if (!(ret == 0 && outbytesleft == 0))

abort ();

if (!(memcmp (outbuf2, "\xe2\x98\xa9", 3) == 0)) /* should be U+2629 */

abort ();

iconv_close (cd);

}

static void test_both_inputs (const char *fromcode,

const char *be_input, const char *le_input,

size_t input_size)

{

test_one_input (fromcode, be_input, input_size);

test_one_input (fromcode, le_input, input_size);

}

int main ()

{

static const char be_input[] = "\xfe\xff\x25\x26\x26\x29";

static const char le_input[] = "\xff\xfe\x26\x25\x29\x26";

#ifdef _LIBICONV_VERSION

test_both_inputs ("UCS-2", be_input, le_input, 6);

#endif

test_both_inputs ("UTF-16", be_input, le_input, 6);

100

}

101

{

102

static const char be_input[] =

103

"\x00\x00\xfe\xff\x00\x00\x25\x26\x00\x00\x26\x29";

104

static const char le_input[] =

105

"\xff\xfe\x00\x00\x26\x25\x00\x00\x29\x26\x00\x00";

106

#ifdef _LIBICONV_VERSION

107

test_both_inputs ("UCS-4", be_input, le_input, 12);

108

#endif

109

test_both_inputs ("UTF-32", be_input, le_input, 12);

110

}

111

return 0;

112

}

Older »