2
Unix SMB/CIFS implementation.
4
local testing of iconv routines. This tests the system iconv code against
5
the built-in iconv code
7
Copyright (C) Andrew Tridgell 2004
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 3 of the License, or
12
(at your option) any later version.
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
GNU General Public License for more details.
19
You should have received a copy of the GNU General Public License
20
along with this program. If not, see <http://www.gnu.org/licenses/>.
24
#include "torture/torture.h"
25
#include "system/iconv.h"
26
#include "system/time.h"
27
#include "libcli/raw/libcliraw.h"
28
#include "param/param.h"
29
#include "torture/util.h"
33
static bool iconv_untestable(struct torture_context *tctx)
37
if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
38
torture_skip(tctx, "system iconv disabled - skipping test");
40
cd = iconv_open("UTF-16LE", "UCS-4LE");
41
if (cd == (iconv_t)-1)
42
torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
45
cd = iconv_open("UTF-16LE", "CP850");
46
if (cd == (iconv_t)-1)
47
torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
54
generate a UTF-16LE buffer for a given unicode codepoint
56
static int gen_codepoint_utf16(unsigned int codepoint,
57
char *buf, size_t *size)
62
size_t size_in, size_out, ret;
64
cd = iconv_open("UTF-16LE", "UCS-4LE");
65
if (cd == (iconv_t)-1) {
71
in[0] = codepoint & 0xFF;
72
in[1] = (codepoint>>8) & 0xFF;
73
in[2] = (codepoint>>16) & 0xFF;
74
in[3] = (codepoint>>24) & 0xFF;
80
ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
89
work out the unicode codepoint of the first UTF-8 character in the buffer
91
static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
96
size_t size_out, size_in, ret;
98
cd = iconv_open("UCS-4LE", charset);
101
ptr_out = (char *)out;
102
size_out = sizeof(out);
103
memset(out, 0, sizeof(out));
105
ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
109
return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
113
display a buffer with name prefix
115
static void show_buf(const char *name, uint8_t *buf, size_t size)
119
for (i=0;i<size;i++) {
120
printf("%02x ", buf[i]);
126
given a UTF-16LE buffer, test the system and built-in iconv code to
127
make sure they do exactly the same thing in converting the buffer to
128
"charset", then convert it back again and ensure we get the same
131
static bool test_buffer(struct torture_context *test,
132
uint8_t *inbuf, size_t size, const char *charset)
134
uint8_t buf1[1000], buf2[1000], buf3[1000];
135
size_t outsize1, outsize2, outsize3;
138
size_t size_in1, size_in2, size_in3;
139
size_t ret1, ret2, ret3, len1, len2;
142
static smb_iconv_t cd2, cd3;
143
static const char *last_charset;
145
if (cd && last_charset) {
147
smb_iconv_close(cd2);
148
smb_iconv_close(cd3);
153
cd = iconv_open(charset, "UTF-16LE");
154
if (cd == (iconv_t)-1) {
156
talloc_asprintf(test,
157
"failed to open %s to UTF-16LE",
160
cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
161
cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
162
last_charset = charset;
165
/* internal convert to charset - placing result in buf1 */
166
ptr_in = (const char *)inbuf;
167
ptr_out = (char *)buf1;
169
outsize1 = sizeof(buf1);
171
memset(ptr_out, 0, outsize1);
173
ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
176
/* system convert to charset - placing result in buf2 */
177
ptr_in = (const char *)inbuf;
178
ptr_out = (char *)buf2;
180
outsize2 = sizeof(buf2);
182
memset(ptr_out, 0, outsize2);
184
ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
187
len1 = sizeof(buf1) - outsize1;
188
len2 = sizeof(buf2) - outsize2;
190
/* codepoints above 1M are not interesting for now */
192
memcmp(buf1, buf2, len1) == 0 &&
193
get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
197
memcmp(buf1, buf2, len2) == 0 &&
198
get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
202
torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
204
if (errno1 != errno2) {
205
show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
206
show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
207
torture_fail(test, talloc_asprintf(test,
209
errno1, strerror(errno1),
210
errno2, strerror(errno2)));
213
torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
215
torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
218
memcmp(buf1, buf2, len1) != 0) {
219
torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
220
show_buf(" IN1:", inbuf, size-size_in1);
221
show_buf(" IN2:", inbuf, size-size_in2);
222
show_buf("OUT1:", buf1, len1);
223
show_buf("OUT2:", buf2, len2);
224
if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
225
torture_comment(test, "next codepoint is %u",
226
get_codepoint((char *)(buf2+len1), len2-len1, charset));
228
if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
229
torture_comment(test, "next codepoint is %u",
230
get_codepoint((char *)(buf1+len2),len1-len2, charset));
233
torture_fail(test, "failed");
236
/* convert back to UTF-16, putting result in buf3 */
237
size = size - size_in1;
238
ptr_in = (const char *)buf1;
239
ptr_out = (char *)buf3;
241
outsize3 = sizeof(buf3);
243
memset(ptr_out, 0, outsize3);
244
ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
246
/* we only internally support the first 1M codepoints */
247
if (outsize3 != sizeof(buf3) - size &&
248
get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
249
size - (sizeof(buf3) - outsize3),
250
"UTF-16LE") >= (1<<20)) {
254
torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
255
"pull failed - %s", strerror(errno)));
257
if (strncmp(charset, "UTF", 3) != 0) {
258
/* don't expect perfect mappings for non UTF charsets */
263
torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
266
if (memcmp(buf3, inbuf, size) != 0) {
267
torture_comment(test, "pull bytes mismatch:");
268
show_buf("inbuf", inbuf, size);
269
show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
270
torture_comment(test, "next codepoint is %u\n",
271
get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
272
size - (sizeof(buf3) - outsize3),
274
torture_fail(test, "");
282
test the push_codepoint() and next_codepoint() functions for a given
285
static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
291
size = push_codepoint(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
292
torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
293
"Invalid Codepoint range");
295
if (size == -1) return true;
297
buf[size] = random();
298
buf[size+1] = random();
299
buf[size+2] = random();
300
buf[size+3] = random();
302
c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
304
torture_assert(tctx, c == codepoint,
305
talloc_asprintf(tctx,
306
"next_codepoint(%u) failed - gave %u", codepoint, c));
308
torture_assert(tctx, size2 == size,
309
talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
310
codepoint, (int)size2, (int)size));
315
static bool test_next_codepoint(struct torture_context *tctx)
317
unsigned int codepoint;
318
if (iconv_untestable(tctx))
321
for (codepoint=0;codepoint<(1<<20);codepoint++) {
322
if (!test_codepoint(tctx, codepoint))
328
static bool test_first_1m(struct torture_context *tctx)
330
unsigned int codepoint;
332
unsigned char inbuf[1000];
334
if (iconv_untestable(tctx))
337
for (codepoint=0;codepoint<(1<<20);codepoint++) {
338
if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
342
if (codepoint % 1000 == 0) {
343
if (torture_setting_bool(tctx, "progress", true)) {
344
torture_comment(tctx, "codepoint=%u \r", codepoint);
349
if (!test_buffer(tctx, inbuf, size, "UTF-8"))
355
static bool test_random_5m(struct torture_context *tctx)
357
unsigned char inbuf[1000];
360
if (iconv_untestable(tctx))
363
for (i=0;i<500000;i++) {
368
if (torture_setting_bool(tctx, "progress", true)) {
369
torture_comment(tctx, "i=%u \r", i);
374
size = random() % 100;
375
for (c=0;c<size;c++) {
376
if (random() % 100 < 80) {
377
inbuf[c] = random() % 128;
381
if (random() % 10 == 0) {
384
if (random() % 10 == 0) {
388
if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
389
printf("i=%d failed UTF-8\n", i);
393
if (!test_buffer(tctx, inbuf, size, "CP850")) {
394
printf("i=%d failed CP850\n", i);
402
static bool test_string2key(struct torture_context *tctx)
406
TALLOC_CTX *mem_ctx = talloc_new(tctx);
407
size_t len = (random()%1000)+1;
408
const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
414
const char *correct = "a\357\277\275b\357\277\275c\001defg";
416
buf = talloc_size(mem_ctx, len*2);
417
generate_random_buffer((uint8_t *)buf, len*2);
419
torture_comment(tctx, "converting random buffer\n");
421
if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) {
422
torture_fail(tctx, "Failed to convert random buffer\n");
426
SSVAL(&le1[2*i], 0, in1[i]);
429
torture_comment(tctx, "converting fixed buffer to UTF16\n");
431
if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) {
432
torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
435
torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
437
torture_comment(tctx, "converting fixed buffer to UTF8\n");
439
if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) {
440
torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
443
torture_assert(tctx, strcmp(correct, out1) == 0, "conversion gave incorrect result\n");
445
talloc_free(mem_ctx);
450
struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
452
struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
454
torture_suite_add_simple_test(suite, "string2key",
457
torture_suite_add_simple_test(suite, "next_codepoint()",
458
test_next_codepoint);
460
torture_suite_add_simple_test(suite, "first 1M codepoints",
463
torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
466
torture_suite_add_simple_test(suite, "string2key",
473
struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
475
printf("No native iconv library - can't run iconv test\n");