259
259
unicode::code_point lexer::parse_codepoint() {
260
260
static char const hex_digits[] = "0123456789ABCDEF";
262
263
zstring cp_string( "\\u" ); // needed only for error message
264
unicode::code_point cp = 0;
265
for ( int i = 1; i <= 4; ++i ) {
267
if ( !get_char( &c ) || !ascii::is_xdigit( c ) )
264
unicode::code_point high_surrogate = 0;
267
unicode::code_point cp = 0;
268
for ( int i = 1; i <= 4; ++i ) {
269
if ( !get_char( &c ) )
270
throw illegal_codepoint( cur_loc_, cp_string );
272
if ( !ascii::is_xdigit( c ) )
273
throw illegal_codepoint( cur_loc_, cp_string );
274
c = ascii::to_upper( c );
275
char const *const p = std::strchr( hex_digits, c );
277
cp = (cp << 4) | (p - hex_digits);
280
if ( unicode::is_high_surrogate( cp ) ) {
281
if ( high_surrogate )
282
throw illegal_codepoint( cur_loc_, cp_string );
284
// It's easier to parse the \u for the low surrogate here rather than
285
// trying to manage state in parse_string().
287
if ( !get_char( &c ) )
288
throw illegal_codepoint( cur_loc_, cp_string );
291
throw illegal_codepoint( cur_loc_, cp_string );
292
if ( !get_char( &c ) )
293
throw illegal_codepoint( cur_loc_, cp_string );
296
throw illegal_codepoint( cur_loc_, cp_string );
301
if ( unicode::is_low_surrogate( cp ) ) {
302
if ( !high_surrogate )
303
throw illegal_codepoint( cur_loc_, cp_string );
304
return unicode::convert_surrogate( high_surrogate, cp );
306
if ( high_surrogate )
268
307
throw illegal_codepoint( cur_loc_, cp_string );
270
c = ascii::to_upper( c );
271
char const *const p = std::strchr( hex_digits, c );
273
cp = (cp << 4) | (p - hex_digits);
278
313
token::type lexer::parse_literal( char first_c, token::value_type *value ) {