99
100
int res = mbtowc (pwc, p, m);
103
if (pwc != NULL && ((*pwc == 0) != (res == 0)))
105
if (nstate >= (res > 0 ? res : 1))
104
if (pwc != NULL && ((*pwc == 0) != (res == 0)))
106
if (nstate >= (res > 0 ? res : 1))
112
113
/* mbtowc does not distinguish between invalid and incomplete multibyte
113
sequences. But mbrtowc needs to make this distinction.
114
There are two possible approaches:
115
- Use iconv() and its return value.
116
- Use built-in knowledge about the possible encodings.
117
Given the low quality of implementation of iconv() on the systems that
118
lack mbrtowc(), we use the second approach.
119
The possible encodings are:
121
- EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
123
Use specialized code for each. */
114
sequences. But mbrtowc needs to make this distinction.
115
There are two possible approaches:
116
- Use iconv() and its return value.
117
- Use built-in knowledge about the possible encodings.
118
Given the low quality of implementation of iconv() on the systems that
119
lack mbrtowc(), we use the second approach.
120
The possible encodings are:
122
- EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
124
Use specialized code for each. */
124
125
if (m >= 4 || m >= MB_CUR_MAX)
126
127
/* Here MB_CUR_MAX > 1 and 0 < m < 4. */
128
const char *encoding = locale_charset ();
130
if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
132
/* Cf. unistr/u8-mblen.c. */
133
unsigned char c = (unsigned char) p[0];
148
unsigned char c2 = (unsigned char) p[1];
150
if ((c2 ^ 0x80) < 0x40
151
&& (c >= 0xe1 || c2 >= 0xa0)
152
&& (c != 0xed || c2 < 0xa0))
160
else /* m == 2 || m == 3 */
162
unsigned char c2 = (unsigned char) p[1];
164
if ((c2 ^ 0x80) < 0x40
165
&& (c >= 0xf1 || c2 >= 0x90)
166
&& (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))
172
unsigned char c3 = (unsigned char) p[2];
174
if ((c3 ^ 0x80) < 0x40)
184
/* As a reference for this code, you can use the GNU libiconv
185
implementation. Look for uses of the RET_TOOFEW macro. */
187
if (STREQ (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
191
unsigned char c = (unsigned char) p[0];
193
if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
198
unsigned char c = (unsigned char) p[0];
202
unsigned char c2 = (unsigned char) p[1];
204
if (c2 >= 0xa1 && c2 < 0xff)
210
if (STREQ (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
211
|| STREQ (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
212
|| STREQ (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
216
unsigned char c = (unsigned char) p[0];
218
if (c >= 0xa1 && c < 0xff)
223
if (STREQ (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
227
unsigned char c = (unsigned char) p[0];
229
if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
232
else /* m == 2 || m == 3 */
234
unsigned char c = (unsigned char) p[0];
241
if (STREQ (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
245
unsigned char c = (unsigned char) p[0];
247
if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
250
else /* m == 2 || m == 3 */
252
unsigned char c = (unsigned char) p[0];
254
if (c >= 0x90 && c <= 0xe3)
256
unsigned char c2 = (unsigned char) p[1];
258
if (c2 >= 0x30 && c2 <= 0x39)
264
unsigned char c3 = (unsigned char) p[2];
266
if (c3 >= 0x81 && c3 <= 0xfe)
274
if (STREQ (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
278
unsigned char c = (unsigned char) p[0];
280
if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
281
|| (c >= 0xf0 && c <= 0xf9))
287
/* An unknown multibyte encoding. */
129
const char *encoding = locale_charset ();
131
if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
133
/* Cf. unistr/u8-mblen.c. */
134
unsigned char c = (unsigned char) p[0];
149
unsigned char c2 = (unsigned char) p[1];
151
if ((c2 ^ 0x80) < 0x40
152
&& (c >= 0xe1 || c2 >= 0xa0)
153
&& (c != 0xed || c2 < 0xa0))
161
else /* m == 2 || m == 3 */
163
unsigned char c2 = (unsigned char) p[1];
165
if ((c2 ^ 0x80) < 0x40
166
&& (c >= 0xf1 || c2 >= 0x90)
167
&& (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))
173
unsigned char c3 = (unsigned char) p[2];
175
if ((c3 ^ 0x80) < 0x40)
185
/* As a reference for this code, you can use the GNU libiconv
186
implementation. Look for uses of the RET_TOOFEW macro. */
188
if (STREQ (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
192
unsigned char c = (unsigned char) p[0];
194
if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
199
unsigned char c = (unsigned char) p[0];
203
unsigned char c2 = (unsigned char) p[1];
205
if (c2 >= 0xa1 && c2 < 0xff)
211
if (STREQ (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
212
|| STREQ (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
213
|| STREQ (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
217
unsigned char c = (unsigned char) p[0];
219
if (c >= 0xa1 && c < 0xff)
224
if (STREQ (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
228
unsigned char c = (unsigned char) p[0];
230
if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
233
else /* m == 2 || m == 3 */
235
unsigned char c = (unsigned char) p[0];
242
if (STREQ (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
246
unsigned char c = (unsigned char) p[0];
248
if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
251
else /* m == 2 || m == 3 */
253
unsigned char c = (unsigned char) p[0];
255
if (c >= 0x90 && c <= 0xe3)
257
unsigned char c2 = (unsigned char) p[1];
259
if (c2 >= 0x30 && c2 <= 0x39)
265
unsigned char c3 = (unsigned char) p[2];
267
if (c3 >= 0x81 && c3 <= 0xfe)
275
if (STREQ (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
279
unsigned char c = (unsigned char) p[0];
281
if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
282
|| (c >= 0xf0 && c <= 0xf9))
288
/* An unknown multibyte encoding. */
294
/* Here 0 <= k < m < 4. */
295
/* Here 0 <= k < m < 4. */
306
307
return (size_t)(-2);