87
89
# define JSON_PARSER_PARSE_BUFFER_SIZE 3500
92
typedef unsigned short UTF16;
91
94
struct JSON_parser_struct {
92
95
JSON_parser_callback callback;
94
97
signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually;
95
UTF16 utf16_decode_buffer[2];
98
UTF16 utf16_high_surrogate;
98
101
signed char* stack;
523
#define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
524
#define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
525
#define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
526
static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
520
528
static int decode_unicode_char(JSON_parser jc)
522
const unsigned chars = jc->utf16_decode_buffer[0] ? 2 : 1;
524
UTF16 *uc = chars == 1 ? &jc->utf16_decode_buffer[0] : &jc->utf16_decode_buffer[1];
528
535
assert(jc->parse_buffer_count >= 6);
530
537
p = &jc->parse_buffer[jc->parse_buffer_count - 4];
532
for (i = 0; i < 4; ++i, ++p) {
539
for (i = 12; i >= 0; i -= 4, ++p) {
537
544
} else if (x >= 'A') {
540
x &= ~((UTF16) 0x30);
545
*uc |= x << ((3u - i) << 2);
548
/* clear UTF-16 char form buffer */
555
/* clear UTF-16 char from buffer */
549
556
jc->parse_buffer_count -= 6;
550
557
jc->parse_buffer[jc->parse_buffer_count] = 0;
552
559
/* attempt decoding ... */
554
UTF8* dec_start = (UTF8*)&jc->parse_buffer[jc->parse_buffer_count];
555
UTF8* dec_start_dup = dec_start;
556
UTF8* dec_end = dec_start + 6;
558
const UTF16* enc_start = &jc->utf16_decode_buffer[0];
559
const UTF16* enc_end = enc_start + chars;
561
const ConversionResult result = ConvertUTF16toUTF8(
562
&enc_start, enc_end, &dec_start, dec_end, strictConversion);
564
const size_t new_chars = dec_start - dec_start_dup;
566
/* was it a surrogate UTF-16 char? */
567
if (chars == 1 && result == sourceExhausted) {
560
if (jc->utf16_high_surrogate) {
561
if (IS_LOW_SURROGATE(uc)) {
562
uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc);
564
jc->utf16_high_surrogate = 0;
566
/* high surrogate without a following low surrogate */
572
} else if (uc < 0x800) {
574
} else if (IS_HIGH_SURROGATE(uc)) {
575
/* save the high surrogate and wait for the low surrogate */
576
jc->utf16_high_surrogate = uc;
571
if (result != conversionOK) {
578
} else if (IS_LOW_SURROGATE(uc)) {
579
/* low surrogate without a preceding high surrogate */
575
/* NOTE: clear decode buffer to resume string reading,
576
otherwise we continue to read UTF-16 */
577
jc->utf16_decode_buffer[0] = 0;
579
assert(new_chars <= 6);
581
jc->parse_buffer_count += new_chars;
582
jc->parse_buffer[jc->parse_buffer_count] = 0;
586
jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]);
588
for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) {
589
jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80);
592
jc->parse_buffer[jc->parse_buffer_count] = 0;