4
* Copyright (C) 2008 10gen Inc.
6
* This program is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Affero General Public License, version 3,
8
* as published by the Free Software Foundation.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU Affero General Public License for more details.
15
* You should have received a copy of the GNU Affero General Public License
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
21
#include "../util/builder.h"
22
#include "../util/base64.h"
24
using namespace boost::spirit;
28
struct ObjectBuilder {
29
BSONObjBuilder *back() {
30
return builders.back().get();
32
// Storage for field names of elements within builders.back().
33
const char *fieldName() {
34
return fieldNames.back().c_str();
37
return builders.size() == 0;
40
boost::shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
41
builders.push_back( b );
42
fieldNames.push_back( "" );
43
indexes.push_back( 0 );
45
void pushObject( const char *fieldName ) {
46
boost::shared_ptr< BSONObjBuilder > b( new BSONObjBuilder( builders.back()->subobjStart( fieldName ) ) );
47
builders.push_back( b );
48
fieldNames.push_back( "" );
49
indexes.push_back( 0 );
51
void pushArray( const char *fieldName ) {
52
boost::shared_ptr< BSONObjBuilder > b( new BSONObjBuilder( builders.back()->subarrayStart( fieldName ) ) );
53
builders.push_back( b );
54
fieldNames.push_back( "" );
55
indexes.push_back( 0 );
59
if ( back()->owned() )
64
fieldNames.pop_back();
68
void nameFromIndex() {
69
fieldNames.back() = BSONObjBuilder::numStr( indexes.back() );
72
string ret = ss.str();
76
// Cannot use auto_ptr because its copy constructor takes a non const reference.
77
vector< boost::shared_ptr< BSONObjBuilder > > builders;
78
vector< string > fieldNames;
79
vector< int > indexes;
84
BinDataType binDataType;
91
objectStart( ObjectBuilder &_b ) : b( _b ) {}
92
void operator() ( const char &c ) const {
96
b.pushObject( b.fieldName() );
102
arrayStart( ObjectBuilder &_b ) : b( _b ) {}
103
void operator() ( const char &c ) const {
104
b.pushArray( b.fieldName() );
111
arrayNext( ObjectBuilder &_b ) : b( _b ) {}
112
void operator() ( const char &c ) const {
120
ch( ObjectBuilder &_b ) : b( _b ) {}
121
void operator() ( const char c ) const {
128
chE( ObjectBuilder &_b ) : b( _b ) {}
129
void operator() ( const char c ) const {
172
if ( '0' <= c && c <= '9' )
174
if ( 'a' <= c && c <= 'f' )
176
if ( 'A' <= c && c <= 'F' )
181
char val( const char *c ) {
182
return ( val( c[ 0 ] ) << 4 ) | val( c[ 1 ] );
187
chU( ObjectBuilder &_b ) : b( _b ) {}
188
void operator() ( const char *start, const char *end ) const {
189
unsigned char first = hex::val( start );
190
unsigned char second = hex::val( start + 2 );
191
if ( first == 0 && second < 0x80 )
193
else if ( first < 0x08 ) {
194
b.ss << char( 0xc0 | ( ( first << 2 ) | ( second >> 6 ) ) );
195
b.ss << char( 0x80 | ( ~0xc0 & second ) );
197
b.ss << char( 0xe0 | ( first >> 4 ) );
198
b.ss << char( 0x80 | ( ~0xc0 & ( ( first << 2 ) | ( second >> 6 ) ) ) );
199
b.ss << char( 0x80 | ( ~0xc0 & second ) );
206
chClear( ObjectBuilder &_b ) : b( _b ) {}
207
void operator() ( const char c ) const {
213
struct fieldNameEnd {
214
fieldNameEnd( ObjectBuilder &_b ) : b( _b ) {}
215
void operator() ( const char *start, const char *end ) const {
216
string name = b.popString();
217
massert( 10338 , "Invalid use of reserved field name",
223
name != "$options" );
224
b.fieldNames.back() = name;
229
struct unquotedFieldNameEnd {
230
unquotedFieldNameEnd( ObjectBuilder &_b ) : b( _b ) {}
231
void operator() ( const char *start, const char *end ) const {
232
string name( start, end );
233
b.fieldNames.back() = name;
239
stringEnd( ObjectBuilder &_b ) : b( _b ) {}
240
void operator() ( const char *start, const char *end ) const {
241
b.back()->append( b.fieldName(), b.popString() );
247
numberValue( ObjectBuilder &_b ) : b( _b ) {}
248
void operator() ( double d ) const {
249
b.back()->append( b.fieldName(), d );
255
intValue( ObjectBuilder &_b ) : b( _b ) {}
256
void operator() ( long long num ) const {
257
if (num >= numeric_limits<int>::min() && num <= numeric_limits<int>::max())
258
b.back()->append( b.fieldName(), (int)num );
260
b.back()->append( b.fieldName(), num );
265
struct subobjectEnd {
266
subobjectEnd( ObjectBuilder &_b ) : b( _b ) {}
267
void operator() ( const char *start, const char *end ) const {
274
arrayEnd( ObjectBuilder &_b ) : b( _b ) {}
275
void operator() ( const char *start, const char *end ) const {
282
trueValue( ObjectBuilder &_b ) : b( _b ) {}
283
void operator() ( const char *start, const char *end ) const {
284
b.back()->appendBool( b.fieldName(), true );
290
falseValue( ObjectBuilder &_b ) : b( _b ) {}
291
void operator() ( const char *start, const char *end ) const {
292
b.back()->appendBool( b.fieldName(), false );
298
nullValue( ObjectBuilder &_b ) : b( _b ) {}
299
void operator() ( const char *start, const char *end ) const {
300
b.back()->appendNull( b.fieldName() );
306
dbrefNS( ObjectBuilder &_b ) : b( _b ) {}
307
void operator() ( const char *start, const char *end ) const {
308
b.ns = b.popString();
313
// NOTE s must be 24 characters.
314
OID stringToOid( const char *s ) {
316
char *oidP = (char *)( &oid );
317
for ( int i = 0; i < 12; ++i )
318
oidP[ i ] = hex::val( s + ( i * 2 ) );
323
oidValue( ObjectBuilder &_b ) : b( _b ) {}
324
void operator() ( const char *start, const char *end ) const {
325
b.oid = stringToOid( start );
331
dbrefEnd( ObjectBuilder &_b ) : b( _b ) {}
332
void operator() ( const char *start, const char *end ) const {
333
b.back()->appendDBRef( b.fieldName(), b.ns.c_str(), b.oid );
339
oidEnd( ObjectBuilder &_b ) : b( _b ) {}
340
void operator() ( const char *start, const char *end ) const {
341
b.back()->appendOID( b.fieldName(), &b.oid );
346
struct binDataBinary {
347
binDataBinary( ObjectBuilder &_b ) : b( _b ) {}
348
void operator() ( const char *start, const char *end ) const {
349
massert( 10339 , "Badly formatted bindata", ( end - start ) % 4 == 0 );
350
string encoded( start, end );
351
b.binData = base64::decode( encoded );
357
binDataType( ObjectBuilder &_b ) : b( _b ) {}
358
void operator() ( const char *start, const char *end ) const {
359
b.binDataType = BinDataType( hex::val( start ) );
365
binDataEnd( ObjectBuilder &_b ) : b( _b ) {}
366
void operator() ( const char *start, const char *end ) const {
367
b.back()->appendBinData( b.fieldName(), b.binData.length(),
368
b.binDataType, b.binData.data() );
374
dateValue( ObjectBuilder &_b ) : b( _b ) {}
375
void operator() ( Date_t v ) const {
382
dateEnd( ObjectBuilder &_b ) : b( _b ) {}
383
void operator() ( const char *start, const char *end ) const {
384
b.back()->appendDate( b.fieldName(), b.date );
390
regexValue( ObjectBuilder &_b ) : b( _b ) {}
391
void operator() ( const char *start, const char *end ) const {
392
b.regex = b.popString();
397
struct regexOptions {
398
regexOptions( ObjectBuilder &_b ) : b( _b ) {}
399
void operator() ( const char *start, const char *end ) const {
400
b.regexOptions = string( start, end );
406
regexEnd( ObjectBuilder &_b ) : b( _b ) {}
407
void operator() ( const char *start, const char *end ) const {
408
b.back()->appendRegex( b.fieldName(), b.regex.c_str(),
409
b.regexOptions.c_str() );
414
// One gotcha with this parsing library is probably best ilustrated with an
415
// example. Say we have a production like this:
416
// z = ( ch_p( 'a' )[ foo ] >> ch_p( 'b' ) ) | ( ch_p( 'a' )[ foo ] >> ch_p( 'c' ) );
417
// On input "ac", action foo() will be called twice -- once as the parser tries
418
// to match "ab", again as the parser successfully matches "ac". Sometimes
419
// the grammar can be modified to eliminate these situations. Here, for example:
420
// z = ch_p( 'a' )[ foo ] >> ( ch_p( 'b' ) | ch_p( 'c' ) );
421
// However, this is not always possible. In my implementation I've tried to
422
// stick to the following pattern: store fields fed to action callbacks
423
// temporarily as ObjectBuilder members, then append to a BSONObjBuilder once
424
// the parser has completely matched a nonterminal and won't backtrack. It's
425
// worth noting here that this parser follows a short-circuit convention. So,
426
// in the original z example on line 3, if the input was "ab", foo() would only
428
struct JsonGrammar : public grammar< JsonGrammar > {
430
JsonGrammar( ObjectBuilder &_b ) : b( _b ) {}
432
template < typename ScannerT >
434
definition( JsonGrammar const &self ) {
435
object = ch_p( '{' )[ objectStart( self.b ) ] >> !members >> '}';
436
members = list_p((fieldName >> ':' >> value) , ',');
438
str[ fieldNameEnd( self.b ) ] |
439
singleQuoteStr[ fieldNameEnd( self.b ) ] |
440
unquotedFieldName[ unquotedFieldNameEnd( self.b ) ];
441
array = ch_p( '[' )[ arrayStart( self.b ) ] >> !elements >> ']';
442
elements = list_p(value, ch_p(',')[arrayNext( self.b )]);
444
oid[ oidEnd( self.b ) ] |
445
dbref[ dbrefEnd( self.b ) ] |
446
bindata[ binDataEnd( self.b ) ] |
447
date[ dateEnd( self.b ) ] |
448
regex[ regexEnd( self.b ) ] |
449
str[ stringEnd( self.b ) ] |
450
singleQuoteStr[ stringEnd( self.b ) ] |
453
object[ subobjectEnd( self.b ) ] |
454
array[ arrayEnd( self.b ) ] |
455
lexeme_d[ str_p( "true" ) ][ trueValue( self.b ) ] |
456
lexeme_d[ str_p( "false" ) ][ falseValue( self.b ) ] |
457
lexeme_d[ str_p( "null" ) ][ nullValue( self.b ) ];
458
// NOTE lexeme_d and rules don't mix well, so we have this mess.
459
// NOTE We use range_p rather than cntrl_p, because the latter is locale dependent.
460
str = lexeme_d[ ch_p( '"' )[ chClear( self.b ) ] >>
463
ch_p( 'b' )[ chE( self.b ) ] |
464
ch_p( 'f' )[ chE( self.b ) ] |
465
ch_p( 'n' )[ chE( self.b ) ] |
466
ch_p( 'r' )[ chE( self.b ) ] |
467
ch_p( 't' )[ chE( self.b ) ] |
468
ch_p( 'v' )[ chE( self.b ) ] |
469
( ch_p( 'u' ) >> ( repeat_p( 4 )[ xdigit_p ][ chU( self.b ) ] ) ) |
470
( ~ch_p('x') & (~range_p('0','9'))[ ch( self.b ) ] ) // hex and octal aren't supported
473
( ~range_p( 0x00, 0x1f ) & ~ch_p( '"' ) & ( ~ch_p( '\\' ) )[ ch( self.b ) ] ) ) >> '"' ];
475
singleQuoteStr = lexeme_d[ ch_p( '\'' )[ chClear( self.b ) ] >>
478
ch_p( 'b' )[ chE( self.b ) ] |
479
ch_p( 'f' )[ chE( self.b ) ] |
480
ch_p( 'n' )[ chE( self.b ) ] |
481
ch_p( 'r' )[ chE( self.b ) ] |
482
ch_p( 't' )[ chE( self.b ) ] |
483
ch_p( 'v' )[ chE( self.b ) ] |
484
( ch_p( 'u' ) >> ( repeat_p( 4 )[ xdigit_p ][ chU( self.b ) ] ) ) |
485
( ~ch_p('x') & (~range_p('0','9'))[ ch( self.b ) ] ) // hex and octal aren't supported
488
( ~range_p( 0x00, 0x1f ) & ~ch_p( '\'' ) & ( ~ch_p( '\\' ) )[ ch( self.b ) ] ) ) >> '\'' ];
490
// real_p accepts numbers with nonsignificant zero prefixes, which
491
// aren't allowed in JSON. Oh well.
492
number = strict_real_p[ numberValue( self.b ) ];
494
static int_parser<long long, 10, 1, numeric_limits<long long>::digits10 + 1> long_long_p;
495
integer = long_long_p[ intValue(self.b) ];
497
// We allow a subset of valid js identifier names here.
498
unquotedFieldName = lexeme_d[ ( alpha_p | ch_p( '$' ) | ch_p( '_' ) ) >> *( ( alnum_p | ch_p( '$' ) | ch_p( '_' )) ) ];
500
dbref = dbrefS | dbrefT;
501
dbrefS = ch_p( '{' ) >> "\"$ref\"" >> ':' >>
502
str[ dbrefNS( self.b ) ] >> ',' >> "\"$id\"" >> ':' >> quotedOid >> '}';
503
dbrefT = str_p( "Dbref" ) >> '(' >> str[ dbrefNS( self.b ) ] >> ',' >>
507
oidS = ch_p( '{' ) >> "\"$oid\"" >> ':' >> quotedOid >> '}';
508
oidT = str_p( "ObjectId" ) >> '(' >> quotedOid >> ')';
510
quotedOid = lexeme_d[ '"' >> ( repeat_p( 24 )[ xdigit_p ] )[ oidValue( self.b ) ] >> '"' ];
512
bindata = ch_p( '{' ) >> "\"$binary\"" >> ':' >>
513
lexeme_d[ '"' >> ( *( range_p( 'A', 'Z' ) | range_p( 'a', 'z' ) | range_p( '0', '9' ) | ch_p( '+' ) | ch_p( '/' ) ) >> *ch_p( '=' ) )[ binDataBinary( self.b ) ] >> '"' ] >> ',' >> "\"$type\"" >> ':' >>
514
lexeme_d[ '"' >> ( repeat_p( 2 )[ xdigit_p ] )[ binDataType( self.b ) ] >> '"' ] >> '}';
516
// TODO: this will need to use a signed parser at some point
517
date = dateS | dateT;
518
dateS = ch_p( '{' ) >> "\"$date\"" >> ':' >> uint_parser< Date_t >()[ dateValue( self.b ) ] >> '}';
519
dateT = !str_p("new") >> str_p( "Date" ) >> '(' >> uint_parser< Date_t >()[ dateValue( self.b ) ] >> ')';
521
regex = regexS | regexT;
522
regexS = ch_p( '{' ) >> "\"$regex\"" >> ':' >> str[ regexValue( self.b ) ] >> ',' >> "\"$options\"" >> ':' >> lexeme_d[ '"' >> ( *( alpha_p ) )[ regexOptions( self.b ) ] >> '"' ] >> '}';
523
// FIXME Obviously it would be nice to unify this with str.
524
regexT = lexeme_d[ ch_p( '/' )[ chClear( self.b ) ] >>
526
( ch_p( '"' )[ chE( self.b ) ] |
527
ch_p( '\\' )[ chE( self.b ) ] |
528
ch_p( '/' )[ chE( self.b ) ] |
529
ch_p( 'b' )[ chE( self.b ) ] |
530
ch_p( 'f' )[ chE( self.b ) ] |
531
ch_p( 'n' )[ chE( self.b ) ] |
532
ch_p( 'r' )[ chE( self.b ) ] |
533
ch_p( 't' )[ chE( self.b ) ] |
534
( ch_p( 'u' ) >> ( repeat_p( 4 )[ xdigit_p ][ chU( self.b ) ] ) ) ) ) |
535
( ~range_p( 0x00, 0x1f ) & ~ch_p( '/' ) & ( ~ch_p( '\\' ) )[ ch( self.b ) ] ) ) >> str_p( "/" )[ regexValue( self.b ) ]
536
>> ( *( ch_p( 'i' ) | ch_p( 'g' ) | ch_p( 'm' ) ) )[ regexOptions( self.b ) ] ];
538
rule< ScannerT > object, members, array, elements, value, str, number, integer,
539
dbref, dbrefS, dbrefT, oid, oidS, oidT, bindata, date, dateS, dateT,
540
regex, regexS, regexT, quotedOid, fieldName, unquotedFieldName, singleQuoteStr;
541
const rule< ScannerT > &start() const {
548
BSONObj fromjson( const char *str ) {
552
JsonGrammar parser( b );
553
parse_info<> result = parse( str, parser, space_p );
554
if ( !result.full ) {
555
int len = strlen( result.stop );
559
ss << "Failure parsing JSON string near: " << string( result.stop, len );
560
massert( 10340 , ss.str(), false );
565
BSONObj fromjson( const string &str ) {
566
return fromjson( str.c_str() );