254
254
location::line_type const quote_line = cur_loc_.line();
255
255
location::column_type const quote_col = cur_loc_.column();
256
parse_string( &t->value_ );
256
if ( !parse_string( &t->value_, throw_exceptions ) )
257
258
t->type_ = token::string;
259
260
cur_loc_.file(), quote_line, quote_col, prev_line_, prev_col_
274
t->numeric_type_ = parse_number( c, &t->value_ );
275
token::numeric_type nt;
276
if ( !(nt = parse_number( c, &t->value_, throw_exceptions )) )
278
t->numeric_type_ = nt;
275
279
t->type_ = token::number;
276
280
set_loc_range( &t->loc_ );
281
t->type_ = parse_literal( c, &t->value_ );
287
if ( !(tt = parse_literal( c, &t->value_, throw_exceptions )) )
282
290
set_loc_range( &t->loc_ );
291
300
t->loc_ = cur_loc_;
294
throw illegal_character( cur_loc_, c );
303
if ( throw_exceptions )
304
throw illegal_character( cur_loc_, c );
299
unicode::code_point lexer::parse_codepoint() {
310
bool lexer::parse_codepoint( unicode::code_point *result,
311
bool throw_exceptions ) {
300
312
static char const hex_digits[] = "0123456789ABCDEF";
307
319
unicode::code_point cp = 0;
308
320
for ( int i = 1; i <= 4; ++i ) {
309
321
if ( !get_char( &c ) )
310
throw illegal_codepoint( set_cur_loc_end( false ), cp_string );
322
goto error_set_cur_loc_end_false;
312
324
if ( !ascii::is_xdigit( c ) )
313
throw illegal_codepoint( set_cur_loc_end(), cp_string );
325
goto error_set_cur_loc_end;
314
326
c = ascii::to_upper( c );
315
327
char const *const p = std::strchr( hex_digits, c );
320
332
if ( unicode::is_high_surrogate( cp ) ) {
321
333
if ( high_surrogate )
322
throw illegal_codepoint( set_cur_loc_end(), cp_string );
334
goto error_set_cur_loc_end;
324
336
// It's easier to parse the \u for the low surrogate here rather than
325
337
// trying to manage state in parse_string().
327
339
if ( !get_char( &c ) )
328
throw illegal_codepoint( set_cur_loc_end( false ), cp_string );
340
goto error_set_cur_loc_end_false;
331
throw illegal_codepoint( set_cur_loc_end(), cp_string );
332
if ( !get_char( &c ) )
333
throw illegal_codepoint( set_cur_loc_end(), cp_string );
342
if ( c != '\\' || !get_char( &c ) )
343
goto error_set_cur_loc_end;
336
throw illegal_codepoint( set_cur_loc_end(), cp_string );
346
goto error_set_cur_loc_end;
338
348
high_surrogate = cp;
341
351
if ( unicode::is_low_surrogate( cp ) ) {
342
352
if ( !high_surrogate )
343
throw illegal_codepoint( set_cur_loc_end(), cp_string );
344
return unicode::convert_surrogate( high_surrogate, cp );
353
goto error_set_cur_loc_end;
354
*result = unicode::convert_surrogate( high_surrogate, cp );
346
357
if ( high_surrogate )
347
throw illegal_codepoint( set_cur_loc_end(), cp_string );
358
goto error_set_cur_loc_end;
364
error_set_cur_loc_end:
365
if ( throw_exceptions )
366
throw illegal_codepoint( set_cur_loc_end(), cp_string );
369
error_set_cur_loc_end_false:
370
if ( throw_exceptions )
371
throw illegal_codepoint( set_cur_loc_end( false ), cp_string );
353
token::type lexer::parse_literal( char first_c, token::value_type *value ) {
375
token::type lexer::parse_literal( char first_c, token::value_type *value,
376
bool throw_exceptions ) {
354
377
static token::value_type const false_value( "false" );
355
378
static token::value_type const null_value ( "null" );
356
379
static token::value_type const true_value ( "true" );
367
390
for ( char const *s = value->c_str(); *++s; ) {
368
391
if ( !get_char( &c ) )
369
throw illegal_literal( set_cur_loc_end( false ) );
392
goto error_set_cur_loc_end_false;
371
throw illegal_literal( set_cur_loc_end() );
394
goto error_set_cur_loc_end;
373
396
if ( peek_char( &c ) && (ascii::is_alnum( c ) || c == '_') )
397
goto error_set_cur_loc_end_false;
401
error_set_cur_loc_end:
402
if ( throw_exceptions )
403
throw illegal_literal( set_cur_loc_end() );
406
error_set_cur_loc_end_false:
407
if ( throw_exceptions )
374
408
throw illegal_literal( set_cur_loc_end( false ) );
379
412
token::numeric_type lexer::parse_number( char first_c,
380
token::value_type *value ) {
413
token::value_type *value,
414
bool throw_exceptions ) {
415
token::numeric_type numeric_type;
383
419
// <number> ::= [-] <int> [<frac>] [<exp>]
385
421
if ( c == '-' ) {
387
423
if ( !get_char( &c ) )
388
throw illegal_number( set_cur_loc_end( false ) );
424
goto error_set_cur_loc_end_false;
391
427
// <int> := '0' | <1-9> <digit>*
392
428
if ( !ascii::is_digit( c ) )
393
throw illegal_number( set_cur_loc_end() );
429
goto error_set_cur_loc_end;
395
token::numeric_type numeric_type = token::integer;
431
numeric_type = token::integer;
396
432
if ( c == '0' ) {
397
433
if ( !peek_char( &c ) )
435
if ( ascii::is_alnum( c ) )
436
goto error_set_cur_loc_end_false;
401
439
if ( !peek_char( &c ) )
403
441
if ( ascii::is_alpha( c ) && c != 'e' && c != 'E' )
404
throw illegal_number( set_cur_loc_end( false ) );
442
goto error_set_cur_loc_end_false;
405
443
if ( !ascii::is_digit( c ) )
416
454
if ( !get_char( &c ) )
417
throw illegal_number( set_cur_loc_end( false ) );
455
goto error_set_cur_loc_end_false;
418
456
if ( !ascii::is_digit( c ) )
419
throw illegal_number( set_cur_loc_end() );
457
goto error_set_cur_loc_end;
421
459
numeric_type = token::decimal;
423
461
if ( !peek_char( &c ) )
425
463
if ( ascii::is_alpha( c ) && c != 'e' && c != 'E' )
426
throw illegal_number( set_cur_loc_end( false ) );
464
goto error_set_cur_loc_end_false;
427
465
if ( !ascii::is_digit( c ) )
440
478
if ( !get_char( &c ) )
441
throw illegal_number( set_cur_loc_end( false ) );
479
goto error_set_cur_loc_end_false;
442
480
if ( c == '+' || c == '-' ) {
444
482
if ( !get_char( &c ) )
445
throw illegal_number( set_cur_loc_end( false ) );
483
goto error_set_cur_loc_end_false;
447
485
if ( !ascii::is_digit( c ) )
448
throw illegal_number( set_cur_loc_end() );
486
goto error_set_cur_loc_end;
450
488
numeric_type = token::floating_point;
452
490
if ( !peek_char( &c ) )
454
492
if ( ascii::is_alpha( c ) )
455
throw illegal_number( set_cur_loc_end( false ) );
493
goto error_set_cur_loc_end_false;
456
494
if ( !ascii::is_digit( c ) )
464
502
return numeric_type;
504
error_set_cur_loc_end:
505
if ( throw_exceptions )
506
throw illegal_number( set_cur_loc_end() );
507
return token::non_numeric;
509
error_set_cur_loc_end_false:
510
if ( throw_exceptions )
511
throw illegal_number( set_cur_loc_end( false ) );
512
return token::non_numeric;
467
void lexer::parse_string( token::value_type *value ) {
515
bool lexer::parse_string( token::value_type *value, bool throw_exceptions ) {
469
517
bool got_backslash = false;
470
518
location start_loc( cur_loc_ );
507
utf8::encode( parse_codepoint(), value );
558
unicode::code_point cp;
559
if ( !parse_codepoint( &cp, throw_exceptions ) )
561
utf8::encode( cp, value );
510
throw illegal_escape( set_cur_loc_end(), c );
565
if ( throw_exceptions )
566
throw illegal_escape( set_cur_loc_end(), c );