60
53
#include "widthspec.h"
56
/* Returns -2 if not enough space, -1 on invalid character. */
64
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
65
const grub_uint8_t *src, grub_size_t srcsize,
66
const grub_uint8_t **srcend)
58
grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
68
grub_uint16_t *p = dest;
70
grub_uint32_t code = 0;
75
while (srcsize && destsize)
77
grub_uint32_t c = *src++;
78
if (srcsize != (grub_size_t)-1)
82
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
90
code |= (c & GRUB_UINT8_6_TRAILINGBITS);
99
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
101
else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
104
code = c & GRUB_UINT8_5_TRAILINGBITS;
106
else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
109
code = c & GRUB_UINT8_4_TRAILINGBITS;
111
else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
114
code = c & GRUB_UINT8_3_TRAILINGBITS;
122
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
124
if (code >= GRUB_UCS2_LIMIT)
126
*p++ = GRUB_UTF16_UPPER_SURROGATE (code);
127
*p++ = GRUB_UTF16_LOWER_SURROGATE (code);
70
if (dest + 1 >= destend)
72
*dest++ = (code >> 6) | 0xC0;
73
*dest++ = (code & 0x3F) | 0x80;
76
if ((code >= 0xDC00 && code <= 0xDFFF)
77
|| (code >= 0xD800 && code <= 0xDBFF))
79
/* No surrogates in UCS-4... */
84
if (dest + 2 >= destend)
86
*dest++ = (code >> 12) | 0xE0;
87
*dest++ = ((code >> 6) & 0x3F) | 0x80;
88
*dest++ = (code & 0x3F) | 0x80;
92
if (dest + 3 >= destend)
94
*dest++ = (code >> 18) | 0xF0;
95
*dest++ = ((code >> 12) & 0x3F) | 0x80;
96
*dest++ = ((code >> 6) & 0x3F) | 0x80;
97
*dest++ = (code & 0x3F) | 0x80;
143
103
/* Convert UCS-4 to UTF-8. */
145
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
105
grub_ucs4_to_utf8 (const grub_uint32_t *src, grub_size_t size,
146
106
grub_uint8_t *dest, grub_size_t destsize)
148
108
/* Keep last char for \0. */
149
109
grub_uint8_t *destend = dest + destsize - 1;
110
grub_uint8_t *dest0 = dest;
151
112
while (size-- && dest < destend)
153
114
grub_uint32_t code = *src++;
157
else if (code <= 0x07FF)
159
if (dest + 1 >= destend)
161
*dest++ = (code >> 6) | 0xC0;
162
*dest++ = (code & 0x3F) | 0x80;
164
else if ((code >= 0xDC00 && code <= 0xDFFF)
165
|| (code >= 0xD800 && code <= 0xDBFF))
167
/* No surrogates in UCS-4... */
116
s = grub_encode_utf8_character (dest, destend, code);
170
else if (code < 0x10000)
172
if (dest + 2 >= destend)
174
*dest++ = (code >> 12) | 0xE0;
175
*dest++ = ((code >> 6) & 0x3F) | 0x80;
176
*dest++ = (code & 0x3F) | 0x80;
180
if (dest + 3 >= destend)
182
*dest++ = (code >> 18) | 0xF0;
183
*dest++ = ((code >> 12) & 0x3F) | 0x80;
184
*dest++ = ((code >> 6) & 0x3F) | 0x80;
185
*dest++ = (code & 0x3F) | 0x80;
191
/* Convert UCS-4 to UTF-8. */
193
grub_ucs4_to_utf8_alloc (grub_uint32_t *src, grub_size_t size)
130
/* Returns the number of bytes the string src would occupy is converted
131
to UTF-8, excluding trailing \0. */
133
grub_get_num_of_utf8_bytes (const grub_uint32_t *src, grub_size_t size)
195
135
grub_size_t remaining;
136
const grub_uint32_t *ptr;
197
137
grub_size_t cnt = 0;
200
139
remaining = size;
231
178
grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
233
181
grub_uint32_t code = 0;
238
grub_uint32_t c = *src++;
239
185
if (srcsize != (grub_size_t)-1)
243
if ((c & 0xc0) != 0x80)
260
if ((c & 0x80) == 0x00)
262
else if ((c & 0xe0) == 0xc0)
267
else if ((c & 0xf0) == 0xe0)
272
else if ((c & 0xf8) == 0xf0)
187
if (!grub_utf8_process (*src++, &code, &count))
193
if (code > GRUB_UNICODE_LAST_VALID)
286
201
grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
287
grub_uint32_t **last_position)
202
grub_uint32_t **last_position)
289
204
grub_size_t msg_len = grub_strlen (msg);
323
238
while (srcsize && destsize)
325
grub_uint32_t c = *src++;
240
int was_count = count;
326
241
if (srcsize != (grub_size_t)-1)
330
if ((c & 0xc0) != 0x80)
334
/* Character c may be valid, don't eat it. */
336
if (srcsize != (grub_size_t)-1)
352
if ((c & 0x80) == 0x00)
354
else if ((c & 0xe0) == 0xc0)
359
else if ((c & 0xf0) == 0xe0)
364
else if ((c & 0xf8) == 0xf0)
243
if (!grub_utf8_process (*src++, &code, &count))
247
/* Character c may be valid, don't eat it. */
1167
const grub_uint32_t *
1168
grub_unicode_get_comb_start (const grub_uint32_t *str,
1169
const grub_uint32_t *cur)
1171
const grub_uint32_t *ptr;
1172
for (ptr = cur; ptr >= str; ptr--)
1174
if (*ptr >= GRUB_UNICODE_VARIATION_SELECTOR_1
1175
&& *ptr <= GRUB_UNICODE_VARIATION_SELECTOR_16)
1178
if (*ptr >= GRUB_UNICODE_VARIATION_SELECTOR_17
1179
&& *ptr <= GRUB_UNICODE_VARIATION_SELECTOR_256)
1182
enum grub_comb_type comb_type;
1183
comb_type = grub_unicode_get_comb_type (*ptr);