1
/* Test the Unicode character name functions.
2
Copyright (C) 2000-2003 Free Software Foundation, Inc.
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2, or (at your option)
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software Foundation,
16
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30
/* The names according to the UnicodeData.txt file, modified to contain the
31
Hangul syllable names, as described in the Unicode 3.0 book. */
32
const char * unicode_names [0x110000];
34
/* Maximum length of a field in the UnicodeData.txt file. */
37
/* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
38
Reads up to (but excluding) DELIM.
39
Returns 1 when a field was successfully read, otherwise 0. */
41
getfield (FILE *stream, char *buffer, int delim)
46
for (; (c = getc (stream)), (c != EOF && c != delim); )
48
/* Put c into the buffer. */
49
if (++count >= FIELDLEN - 1)
51
fprintf (stderr, "field too long\n");
64
/* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
67
fill_names (const char *unicodedata_filename)
71
char field0[FIELDLEN];
72
char field1[FIELDLEN];
75
for (i = 0; i < 0x110000; i++)
76
unicode_names[i] = NULL;
78
stream = fopen (unicodedata_filename, "r");
81
fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
91
n = getfield (stream, field0, ';');
92
n += getfield (stream, field1, ';');
97
fprintf (stderr, "short line in '%s':%d\n",
98
unicodedata_filename, lineno);
101
for (; (c = getc (stream)), (c != EOF && c != '\n'); )
103
i = strtoul (field0, NULL, 16);
106
fprintf (stderr, "index too large\n");
109
unicode_names[i] = xstrdup (field1);
111
if (ferror (stream) || fclose (stream))
113
fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
118
/* Perform an exhaustive test of the unicode_character_name function. */
124
char buf[UNINAME_MAX];
126
for (i = 0; i < 0x11000; i++)
128
char *result = unicode_character_name (i, buf);
130
if (unicode_names[i] != NULL)
134
fprintf (stderr, "\\u%04X name lookup failed!\n", i);
137
else if (strcmp (result, unicode_names[i]) != 0)
139
fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
148
fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
155
for (i = 0x110000; i < 0x1000000; i++)
157
char *result = unicode_character_name (i, buf);
161
fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
170
/* Perform a test of the unicode_name_character function. */
172
test_inverse_lookup ()
177
/* First, verify all valid character names are recognized. */
178
for (i = 0; i < 0x110000; i++)
179
if (unicode_names[i] != NULL)
181
unsigned int result = unicode_name_character (unicode_names[i]);
184
if (result == UNINAME_INVALID)
185
fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
189
"inverse name lookup of \"%s\" returned 0x%04X\n",
190
unicode_names[i], result);
195
/* Second, generate random but likely names and verify they are not
196
recognized unless really valid. */
197
for (i = 0; i < 10000; i++)
202
unsigned int l1, l2, j1, j2;
203
char buf[2*UNINAME_MAX];
206
do i1 = ((rand () % 0x11) << 16)
207
+ ((rand () & 0xff) << 8)
209
while (unicode_names[i1] == NULL);
211
do i2 = ((rand () % 0x11) << 16)
212
+ ((rand () & 0xff) << 8)
214
while (unicode_names[i2] == NULL);
216
s1 = unicode_names[i1];
218
s2 = unicode_names[i2];
221
/* Concatenate a starting piece of s1 with an ending piece of s2. */
222
for (j1 = 1; j1 <= l1; j1++)
223
if (j1 == l1 || s1[j1] == ' ')
224
for (j2 = 0; j2 < l2; j2++)
225
if (j2 == 0 || s2[j2-1] == ' ')
227
memcpy (buf, s1, j1);
229
memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
231
result = unicode_name_character (buf);
232
if (result != UNINAME_INVALID
233
&& !(unicode_names[result] != NULL
234
&& strcmp (unicode_names[result], buf) == 0))
237
"inverse name lookup of \"%s\" returned 0x%04X\n",
238
unicode_names[i], result);
244
/* Third, some extreme case that used to loop. */
245
if (unicode_name_character ("A A") != UNINAME_INVALID)
252
main (int argc, char *argv[])
256
fill_names (argv[1]);
258
error |= test_name_lookup ();
259
error |= test_inverse_lookup ();