1
/* Localization of proper names.
2
Copyright (C) 2006-2010 Free Software Foundation, Inc.
3
Written by Bruno Haible <bruno@clisp.org>, 2006.
5
This program is free software: you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 3 of the License, or
8
(at your option) any later version.
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
#include "propername.h"
35
#include "localcharset.h"
36
#include "c-strcase.h"
37
#include "xstriconv.h"
42
/* Tests whether STRING contains trim (SUB), starting and ending at word
44
Here, instead of implementing Unicode Standard Annex #29 for determining
45
word boundaries, we assume that trim (SUB) starts and ends with words and
46
only test whether the part before it ends with a non-word and the part
47
after it starts with a non-word. */
49
mbsstr_trimmed_wordbounded (const char *string, const char *sub)
51
char *tsub = trim (sub);
54
for (; *string != '\0';)
56
const char *tsub_in_string = mbsstr (string, tsub);
57
if (tsub_in_string == NULL)
63
mbui_iterator_t string_iter;
64
bool word_boundary_before;
65
bool word_boundary_after;
67
mbui_init (string_iter, string);
68
word_boundary_before = true;
69
if (mbui_cur_ptr (string_iter) < tsub_in_string)
71
mbchar_t last_char_before_tsub;
74
if (!mbui_avail (string_iter))
76
last_char_before_tsub = mbui_cur (string_iter);
77
mbui_advance (string_iter);
79
while (mbui_cur_ptr (string_iter) < tsub_in_string);
80
if (mb_isalnum (last_char_before_tsub))
81
word_boundary_before = false;
84
mbui_init (string_iter, tsub_in_string);
86
mbui_iterator_t tsub_iter;
88
for (mbui_init (tsub_iter, tsub);
89
mbui_avail (tsub_iter);
90
mbui_advance (tsub_iter))
92
if (!mbui_avail (string_iter))
94
mbui_advance (string_iter);
97
word_boundary_after = true;
98
if (mbui_avail (string_iter))
100
mbchar_t first_char_after_tsub = mbui_cur (string_iter);
101
if (mb_isalnum (first_char_after_tsub))
102
word_boundary_after = false;
105
if (word_boundary_before && word_boundary_after)
111
mbui_init (string_iter, tsub_in_string);
112
if (!mbui_avail (string_iter))
114
string = tsub_in_string + mb_len (mbui_cur (string_iter));
118
bool word_boundary_before;
120
bool word_boundary_after;
122
word_boundary_before = true;
123
if (string < tsub_in_string)
124
if (isalnum ((unsigned char) tsub_in_string[-1]))
125
word_boundary_before = false;
127
p = tsub_in_string + strlen (tsub);
128
word_boundary_after = true;
130
if (isalnum ((unsigned char) *p))
131
word_boundary_after = false;
133
if (word_boundary_before && word_boundary_after)
139
if (*tsub_in_string == '\0')
141
string = tsub_in_string + 1;
149
/* Return the localization of NAME. NAME is written in ASCII. */
152
proper_name (const char *name)
154
/* See whether there is a translation. */
155
const char *translation = gettext (name);
157
if (translation != name)
159
/* See whether the translation contains the original name. */
160
if (mbsstr_trimmed_wordbounded (translation, name))
164
/* Return "TRANSLATION (NAME)". */
166
XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
168
sprintf (result, "%s (%s)", translation, name);
176
/* Return the localization of a name whose original writing is not ASCII.
177
NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
178
escape sequences. NAME_ASCII is a fallback written only with ASCII
182
proper_name_utf8 (const char *name_ascii, const char *name_utf8)
184
/* See whether there is a translation. */
185
const char *translation = gettext (name_ascii);
187
/* Try to convert NAME_UTF8 to the locale encoding. */
188
const char *locale_code = locale_charset ();
189
char *alloc_name_converted = NULL;
190
char *alloc_name_converted_translit = NULL;
191
const char *name_converted = NULL;
192
const char *name_converted_translit = NULL;
195
if (c_strcasecmp (locale_code, "UTF-8") != 0)
198
name_converted = alloc_name_converted =
199
xstr_iconv (name_utf8, "UTF-8", locale_code);
201
# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \
202
|| _LIBICONV_VERSION >= 0x0105
204
char *converted_translit;
206
size_t len = strlen (locale_code);
207
char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
208
memcpy (locale_code_translit, locale_code, len);
209
memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
212
xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
214
free (locale_code_translit);
216
if (converted_translit != NULL)
218
# if !_LIBICONV_VERSION
219
/* Don't use the transliteration if it added question marks.
220
glibc's transliteration falls back to question marks; libiconv's
221
transliteration does not.
222
mbschr is equivalent to strchr in this case. */
223
if (strchr (converted_translit, '?') != NULL)
224
free (converted_translit);
227
name_converted_translit = alloc_name_converted_translit =
236
name_converted = name_utf8;
237
name_converted_translit = name_utf8;
240
/* The name in locale encoding. */
241
name = (name_converted != NULL ? name_converted :
242
name_converted_translit != NULL ? name_converted_translit :
245
/* See whether we have a translation. Some translators have not understood
246
that they should use the UTF-8 form of the name, if possible. So if the
247
translator provided a no-op translation, we ignore it. */
248
if (strcmp (translation, name_ascii) != 0)
250
/* See whether the translation contains the original name. */
251
if (mbsstr_trimmed_wordbounded (translation, name_ascii)
252
|| (name_converted != NULL
253
&& mbsstr_trimmed_wordbounded (translation, name_converted))
254
|| (name_converted_translit != NULL
255
&& mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
257
if (alloc_name_converted != NULL)
258
free (alloc_name_converted);
259
if (alloc_name_converted_translit != NULL)
260
free (alloc_name_converted_translit);
265
/* Return "TRANSLATION (NAME)". */
267
XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
269
sprintf (result, "%s (%s)", translation, name);
271
if (alloc_name_converted != NULL)
272
free (alloc_name_converted);
273
if (alloc_name_converted_translit != NULL)
274
free (alloc_name_converted_translit);
280
if (alloc_name_converted != NULL && alloc_name_converted != name)
281
free (alloc_name_converted);
282
if (alloc_name_converted_translit != NULL
283
&& alloc_name_converted_translit != name)
284
free (alloc_name_converted_translit);
292
main (int argc, char *argv[])
294
setlocale (LC_ALL, "");
295
if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
305
main (int argc, char *argv[])
307
setlocale (LC_ALL, "");
308
printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));