1
/* Test of u8_check() function.
2
Copyright (C) 2010 Free Software Foundation, Inc.
4
This program is free software: you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 3 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
/* Written by Bruno Haible <bruno@clisp.org>, 2010. */
28
/* Test empty string. */
30
static const uint8_t input[] = "";
31
ASSERT (u8_check (input, 0) == NULL);
34
/* Test valid non-empty string. */
36
static const uint8_t input[] = /* "Данило Шеган" */
37
"\320\224\320\260\320\275\320\270\320\273\320\276 \320\250\320\265\320\263\320\260\320\275";
38
ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
41
/* Test out-of-range character with 4 bytes: U+110000. */
43
static const uint8_t input[] = "\320\224\320\260\364\220\200\200";
44
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
47
/* Test out-of-range character with 5 bytes: U+200000. */
49
static const uint8_t input[] = "\320\224\320\260\370\210\200\200\200";
50
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
53
/* Test out-of-range character with 6 bytes: U+4000000. */
55
static const uint8_t input[] = "\320\224\320\260\374\204\200\200\200\200";
56
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
59
/* Test invalid lead byte. */
61
static const uint8_t input[] = "\320\224\320\260\376\200\200\200\200\200";
62
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
65
static const uint8_t input[] = "\320\224\320\260\377\200\200\200\200\200";
66
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
69
/* Test overlong 2-byte character. */
71
static const uint8_t input[] = "\320\224\320\260\301\200";
72
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
75
/* Test overlong 3-byte character. */
77
static const uint8_t input[] = "\320\224\320\260\340\200\277";
78
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
81
/* Test overlong 4-byte character. */
83
static const uint8_t input[] = "\320\224\320\260\360\200\277\277";
84
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
87
/* Test invalid bytes in 2-byte character. */
89
static const uint8_t input[] = "\320\224\320\260\302\200";
90
ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
93
static const uint8_t input[] = "\320\224\320\260\302\100";
94
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
97
static const uint8_t input[] = "\320\224\320\260\302\300";
98
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
101
/* Test invalid bytes in 3-byte character. */
103
static const uint8_t input[] = "\320\224\320\260\342\200\200";
104
ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
107
static const uint8_t input[] = "\320\224\320\260\342\100\200";
108
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
111
static const uint8_t input[] = "\320\224\320\260\342\300\200";
112
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
115
static const uint8_t input[] = "\320\224\320\260\342\200\100";
116
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
119
static const uint8_t input[] = "\320\224\320\260\342\200\300";
120
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
123
/* Test invalid bytes in 4-byte character. */
125
static const uint8_t input[] = "\320\224\320\260\362\200\200\200";
126
ASSERT (u8_check (input, sizeof (input) - 1) == NULL);
129
static const uint8_t input[] = "\320\224\320\260\362\100\200\200";
130
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
133
static const uint8_t input[] = "\320\224\320\260\362\300\200\200";
134
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
137
static const uint8_t input[] = "\320\224\320\260\362\200\100\200";
138
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
141
static const uint8_t input[] = "\320\224\320\260\362\200\300\200";
142
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
145
static const uint8_t input[] = "\320\224\320\260\362\200\200\100";
146
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
149
static const uint8_t input[] = "\320\224\320\260\362\200\200\300";
150
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
153
/* Test truncated/incomplete 2-byte character. */
155
static const uint8_t input[] = "\320\224\320\260\302";
156
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
159
/* Test truncated/incomplete 3-byte character. */
161
static const uint8_t input[] = "\320\224\320\260\342\200";
162
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
165
/* Test truncated/incomplete 4-byte character. */
167
static const uint8_t input[] = "\320\224\320\260\362\200\200";
168
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
171
/* Test missing lead byte. */
173
static const uint8_t input[] = "\320\224\320\260\200\200\200\200\200";
174
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
177
/* Test surrogate codepoints. */
179
static const uint8_t input[] = "\320\224\320\260\355\240\200\355\260\200";
180
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);
183
static const uint8_t input[] = "\320\224\320\260\355\260\200";
184
ASSERT (u8_check (input, sizeof (input) - 1) == input + 4);