1
/* Copyright (C) 2024 Free Software Foundation, Inc.
2
This file is part of the GNU LIBICONV Library.
4
The GNU LIBICONV Library is free software; you can redistribute it
5
and/or modify it under the terms of the GNU Lesser General Public
6
License as published by the Free Software Foundation; either version 2.1
7
of the License, or (at your option) any later version.
9
The GNU LIBICONV Library is distributed in the hope that it will be
10
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
Lesser General Public License for more details.
14
You should have received a copy of the GNU Lesser General Public
15
License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16
If not, see <https://www.gnu.org/licenses/>. */
25
/* This test checks that iconv(cd,NULL,NULL,...) does not forget about
26
the byte-order state in conversions from UCS-2, UCS-4, UTF-16, UTF-32.
28
The POSIX specification
29
<https://pubs.opengroup.org/onlinepubs/9799919799/functions/iconv.html>
30
is clear that iconv(cd,NULL,NULL,...) has an effect for state-dependent
31
encodings only. The manual page
32
<https://www.kernel.org/doc/man-pages/online/pages/man3/iconv.3.html>
33
is not so clear about it. But Ulrich Drepper states it correctly in
34
<https://bugzilla.redhat.com/show_bug.cgi?id=165368>:
35
"Flushing using iconv() only resets the shift state. This is needed
36
for stateful encodings with states where the caller wants a converted
37
string to end in the initial state. The BOM recognition has nothing
38
to do with shift states. Once the byte order is determined this is
39
a property which stays with the iconv_t descriptor for its lifetime."
41
Based on a bug report from Tomas Kalibera <tomas.kalibera@gmail.com> in
42
<https://lists.gnu.org/archive/html/bug-gnu-libiconv/2024-12/msg00000.html>.
45
static void test_one_input (const char *fromcode,
46
const char *input, size_t input_size)
51
iconv_t cd = iconv_open ("UTF-8", fromcode);
52
if (cd == (iconv_t)(-1))
55
/* Convert the first character. */
56
char *inbuf = (char *) input;
57
size_t inbytesleft = input_size;
58
char *outbuf = outbuf1;
59
size_t outbytesleft = sizeof (outbuf1);
60
size_t ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
61
if (!(ret == (size_t)(-1) && errno == E2BIG && outbytesleft == 0))
63
if (!(memcmp (outbuf1, "\xe2\x94\xa6", 3) == 0)) /* should be U+2526 */
66
/* Reset the shift state. */
67
ret = iconv (cd, NULL, NULL, NULL, NULL);
71
/* Convert the second character. */
73
outbytesleft = sizeof (outbuf2);
74
ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
75
if (!(ret == 0 && outbytesleft == 0))
77
if (!(memcmp (outbuf2, "\xe2\x98\xa9", 3) == 0)) /* should be U+2629 */
83
static void test_both_inputs (const char *fromcode,
84
const char *be_input, const char *le_input,
87
test_one_input (fromcode, be_input, input_size);
88
test_one_input (fromcode, le_input, input_size);
94
static const char be_input[] = "\xfe\xff\x25\x26\x26\x29";
95
static const char le_input[] = "\xff\xfe\x26\x25\x29\x26";
96
#ifdef _LIBICONV_VERSION
97
test_both_inputs ("UCS-2", be_input, le_input, 6);
99
test_both_inputs ("UTF-16", be_input, le_input, 6);
102
static const char be_input[] =
103
"\x00\x00\xfe\xff\x00\x00\x25\x26\x00\x00\x26\x29";
104
static const char le_input[] =
105
"\xff\xfe\x00\x00\x26\x25\x00\x00\x29\x26\x00\x00";
106
#ifdef _LIBICONV_VERSION
107
test_both_inputs ("UCS-4", be_input, le_input, 12);
109
test_both_inputs ("UTF-32", be_input, le_input, 12);