3
/* Matcher is our boolean expression evaluator for "where" clauses */
6
* Copyright (C) 2008 10gen Inc.
8
* This program is free software: you can redistribute it and/or modify
9
* it under the terms of the GNU Affero General Public License, version 3,
10
* as published by the Free Software Foundation.
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU Affero General Public License for more details.
17
* You should have received a copy of the GNU Affero General Public License
18
* along with this program. If not, see <http://www.gnu.org/licenses/>.
23
#include "../util/goodies.h"
24
#include "../util/unittest.h"
26
#include "../scripting/engine.h"
32
//#include "minilex.h"
44
scope->execSetup( "_mongo.readOnly = false;" , "make not read only" );
53
auto_ptr<Scope> scope;
54
ScriptingFunction func;
57
void setFunc(const char *code) {
58
massert( 10341 , "scope has to be created first!" , scope.get() );
59
func = scope->createFunction( code );
69
ElementMatcher::ElementMatcher( BSONElement _e , int _op ) : toMatch( _e ) , compareOp( _op ) {
70
if ( _op == BSONObj::opMOD ){
71
BSONObj o = _e.embeddedObject().firstElement().embeddedObject();
72
mod = o["0"].numberInt();
73
modm = o["1"].numberInt();
75
uassert( 10073 , "mod can't be 0" , mod );
77
else if ( _op == BSONObj::opTYPE ){
78
type = (BSONType)(_e.embeddedObject().firstElement().numberInt());
80
else if ( _op == BSONObj::opELEM_MATCH ){
81
BSONElement m = toMatch.embeddedObjectUserCheck().firstElement();
82
uassert( 12517 , "$elemMatch needs an Object" , m.type() == Object );
83
subMatcher.reset( new Matcher( m.embeddedObject() ) );
88
ElementMatcher::~ElementMatcher(){
98
inline pcrecpp::RE_Options flags2options(const char* flags){
99
pcrecpp::RE_Options options;
100
options.set_utf8(true);
101
while ( flags && *flags ) {
103
options.set_caseless(true);
104
else if ( *flags == 'm' )
105
options.set_multiline(true);
106
else if ( *flags == 'x' )
107
options.set_extended(true);
116
CoveredIndexMatcher::CoveredIndexMatcher(const BSONObj &jsobj, const BSONObj &indexKeyPattern) :
117
_keyMatcher(jsobj.filterFieldsUndotted(indexKeyPattern, true),
122
_docMatcher.keyMatch() &&
123
_keyMatcher.jsobj.nFields() == _docMatcher.jsobj.nFields()
127
bool CoveredIndexMatcher::matches(const BSONObj &key, const DiskLoc &recLoc ) {
128
if ( _keyMatcher.keyMatch() ) {
129
if ( !_keyMatcher.matches(key) ) {
134
if ( ! _needRecord ){
138
return _docMatcher.matches(recLoc.rec());
142
/* _jsobj - the query pattern
144
Matcher::Matcher(const BSONObj &_jsobj, const BSONObj &constrainIndexKey) :
145
where(0), jsobj(_jsobj), haveSize(), all(), hasArray(0), _atomic(false), nRegex(0) {
147
BSONObjIterator i(jsobj);
149
BSONElement e = i.next();
151
if ( ( e.type() == CodeWScope || e.type() == Code || e.type() == String ) && strcmp(e.fieldName(), "$where")==0 ) {
152
// $where: function()...
153
uassert( 10066 , "$where occurs twice?", where == 0 );
154
uassert( 10067 , "$where query, but no script engine", globalScriptEngine );
156
where->scope = globalScriptEngine->getPooledScope( cc().ns() );
157
where->scope->localConnect( cc().database()->name.c_str() );
159
if ( e.type() == CodeWScope ) {
160
where->setFunc( e.codeWScopeCode() );
161
where->jsScope = new BSONObj( e.codeWScopeScopeData() , 0 );
164
const char *code = e.valuestr();
165
where->setFunc(code);
168
where->scope->execSetup( "_mongo.readOnly = true;" , "make read only" );
173
if ( e.type() == RegEx ) {
175
out() << "ERROR: too many regexes in query" << endl;
178
RegexMatcher& rm = regexs[nRegex];
179
rm.re = new pcrecpp::RE(e.regex(), flags2options(e.regexFlags()));
180
rm.fieldName = e.fieldName();
186
// greater than / less than...
187
// e.g., e == { a : { $gt : 3 } }
189
// { a : { $in : [1,2,3] } }
190
if ( e.type() == Object ) {
191
// support {$regex:"a|b", $options:"imx"}
192
const char* regex = NULL;
193
const char* flags = "";
195
// e.g., fe == { $gt : 3 }
196
BSONObjIterator j(e.embeddedObject());
197
bool isOperator = false;
199
BSONElement fe = j.next();
200
const char *fn = fe.fieldName();
202
if ( fn[0] == '$' && fn[1] ) {
203
int op = fe.getGtLtOp( -1 );
206
if ( fn[1] == 'r' && fn[2] == 'e' && fn[3] == 'f' && fn[4] == 0 ){
207
break; // { $ref : xxx } - treat as normal object
209
uassert( 10068 , (string)"invalid operator: " + fn , op != -1 );
219
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
220
_builders.push_back( b );
221
b->appendAs(fe, e.fieldName());
222
addBasic(b->done().firstElement(), op);
227
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
228
_builders.push_back( b );
229
b->appendAs(fe, e.fieldName());
230
addBasic(b->done().firstElement(), BSONObj::NE);
237
basics.push_back( ElementMatcher( e , op , fe.embeddedObject() ) );
240
case BSONObj::opTYPE:
241
case BSONObj::opELEM_MATCH:
242
// these are types where ElementMatcher has all the info
243
basics.push_back( ElementMatcher( e , op ) );
245
case BSONObj::opSIZE:{
246
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
247
_builders.push_back( b );
248
b->appendAs(fe, e.fieldName());
249
addBasic(b->done().firstElement(), BSONObj::opSIZE);
253
case BSONObj::opEXISTS:{
254
shared_ptr< BSONObjBuilder > b( new BSONObjBuilder() );
255
_builders.push_back( b );
256
b->appendAs(fe, e.fieldName());
257
addBasic(b->done().firstElement(), BSONObj::opEXISTS);
260
case BSONObj::opREGEX:{
261
regex = fe.valuestrsafe();
264
case BSONObj::opOPTIONS:{
265
flags = fe.valuestrsafe();
269
uassert( 10069 , (string)"BUG - can't operator for: " + fn , 0 );
280
out() << "ERROR: too many regexes in query" << endl;
282
RegexMatcher& rm = regexs[nRegex];
283
rm.re = new pcrecpp::RE(regex, flags2options(flags));
284
rm.fieldName = e.fieldName();
292
if ( e.type() == Array ){
295
else if( strcmp(e.fieldName(), "$atomic") == 0 ) {
296
_atomic = e.trueValue();
300
// normal, simple case e.g. { a : "foo" }
301
addBasic(e, BSONObj::Equality);
304
constrainIndexKey_ = constrainIndexKey;
307
inline int Matcher::valuesMatch(const BSONElement& l, const BSONElement& r, int op, const ElementMatcher& bm) {
308
assert( op != BSONObj::NE && op != BSONObj::NIN );
310
if ( op == BSONObj::Equality )
311
return l.valuesEqual(r);
313
if ( op == BSONObj::opIN ) {
315
return bm.myset->count(l);
318
if ( op == BSONObj::opSIZE ) {
319
if ( l.type() != Array )
322
BSONObjIterator i( l.embeddedObject() );
323
while( i.moreWithEOO() ) {
324
BSONElement e = i.next();
329
return count == r.number();
332
if ( op == BSONObj::opMOD ){
333
if ( ! l.isNumber() )
336
return l.numberLong() % bm.mod == bm.modm;
339
if ( op == BSONObj::opTYPE ){
340
return bm.type == l.type();
343
/* check LT, GTE, ... */
344
if ( l.canonicalType() != r.canonicalType() )
346
int c = compareElementValues(l, r);
347
if ( c < -1 ) c = -1;
353
int Matcher::matchesNe(const char *fieldName, const BSONElement &toMatch, const BSONObj &obj, const ElementMatcher& bm ) {
354
int ret = matchesDotted( fieldName, toMatch, obj, BSONObj::Equality, bm );
355
if ( bm.toMatch.type() != jstNULL )
356
return ( ret <= 0 ) ? 1 : 0;
361
int retMissing( const ElementMatcher &bm ) {
362
if ( bm.compareOp != BSONObj::opEXISTS )
364
return bm.toMatch.boolean() ? -1 : 1;
367
/* Check if a particular field matches.
369
fieldName - field to match "a.b" if we are reaching into an embedded object.
370
toMatch - element we want to match.
371
obj - database object to check against
372
compareOp - Equality, LT, GT, etc.
377
{ "a.b" : 3 } means obj.a.b == 3
378
{ a : { $lt : 3 } } means obj.a < 3
379
{ a : { $in : [1,2] } } means [1,2].contains(obj.a)
386
int Matcher::matchesDotted(const char *fieldName, const BSONElement& toMatch, const BSONObj& obj, int compareOp, const ElementMatcher& bm , bool isArr) {
388
if ( compareOp == BSONObj::opALL ) {
389
if ( bm.myset->size() == 0 )
390
return -1; // is this desired?
391
BSONObjSetDefaultOrder actualKeys;
392
IndexSpec( BSON( fieldName << 1 ) ).getKeys( obj, actualKeys );
393
if ( actualKeys.size() == 0 )
395
for( set< BSONElement, element_lt >::const_iterator i = bm.myset->begin(); i != bm.myset->end(); ++i ) {
397
if ( i->type() == jstNULL )
399
// parallel traversal would be faster worst case I guess
401
b.appendAs( *i, "" );
402
if ( !actualKeys.count( b.done() ) )
408
if ( compareOp == BSONObj::NE )
409
return matchesNe( fieldName, toMatch, obj, bm );
410
if ( compareOp == BSONObj::NIN ) {
411
for( set<BSONElement,element_lt>::const_iterator i = bm.myset->begin(); i != bm.myset->end(); ++i ) {
412
int ret = matchesNe( fieldName, *i, obj, bm );
420
bool indexed = !constrainIndexKey_.isEmpty();
422
e = obj.getFieldUsingIndexNames(fieldName, constrainIndexKey_);
426
BSONObjIterator ai(obj);
428
while ( ai.moreWithEOO() ) {
429
BSONElement z = ai.next();
430
if ( z.type() == Object ) {
431
BSONObj eo = z.embeddedObject();
432
int cmp = matchesDotted(fieldName, toMatch, eo, compareOp, bm, false);
435
} else if ( cmp < 0 ) {
440
return found ? -1 : retMissing( bm );
442
const char *p = strchr(fieldName, '.');
444
string left(fieldName, p-fieldName);
446
BSONElement se = obj.getField(left.c_str());
448
return retMissing( bm );
449
if ( se.type() != Object && se.type() != Array )
450
return retMissing( bm );
452
BSONObj eo = se.embeddedObject();
453
return matchesDotted(p+1, toMatch, eo, compareOp, bm, se.type() == Array);
455
e = obj.getField(fieldName);
459
if ( compareOp == BSONObj::opEXISTS ) {
460
return ( e.eoo() ^ toMatch.boolean() ) ? 1 : -1;
461
} else if ( ( e.type() != Array || indexed || compareOp == BSONObj::opSIZE ) &&
462
valuesMatch(e, toMatch, compareOp, bm ) ) {
464
} else if ( e.type() == Array && compareOp != BSONObj::opSIZE ) {
466
BSONObjIterator ai(e.embeddedObject());
468
while ( ai.moreWithEOO() ) {
469
BSONElement z = ai.next();
471
if ( compareOp == BSONObj::opELEM_MATCH ){
473
if ( z.type() == Object && bm.subMatcher->matches( z.embeddedObject() ) )
477
if ( valuesMatch( z, toMatch, compareOp, bm) ) {
484
if ( compareOp == BSONObj::Equality && e.woCompare( toMatch ) == 0 ){
485
// match an entire array to itself
490
else if ( e.eoo() ) {
491
// 0 indicates "missing element"
499
inline bool regexMatches(RegexMatcher& rm, const BSONElement& e) {
502
if ( e.type() == String || e.type() == Symbol )
504
else if ( e.isNumber() ) {
505
sprintf(buf, "%f", e.number());
507
else if ( e.type() == Date ) {
509
time_t t = (d.millis/1000);
510
time_t_to_String(t, buf);
514
return rm.re->PartialMatch(p);
517
/* See if an object matches the query.
519
bool Matcher::matches(const BSONObj& jsobj ) {
520
/* assuming there is usually only one thing to match. if more this
521
could be slow sometimes. */
523
// check normal non-regex cases:
524
for ( unsigned i = 0; i < basics.size(); i++ ) {
525
ElementMatcher& bm = basics[i];
526
BSONElement& m = bm.toMatch;
527
// -1=mismatch. 0=missing element. 1=match
528
int cmp = matchesDotted(m.fieldName(), m, jsobj, bm.compareOp, bm );
532
/* missing is ok iff we were looking for null */
533
if ( m.type() == jstNULL || m.type() == Undefined ) {
534
if ( bm.compareOp == BSONObj::NE ) {
543
for ( int r = 0; r < nRegex; r++ ) {
544
RegexMatcher& rm = regexs[r];
546
if ( !constrainIndexKey_.isEmpty() ) {
547
BSONElement e = jsobj.getFieldUsingIndexNames(rm.fieldName, constrainIndexKey_);
551
jsobj.getFieldsDotted( rm.fieldName, s );
554
for( BSONElementSet::const_iterator i = s.begin(); i != s.end(); ++i )
555
if ( regexMatches(rm, *i) )
562
if ( where->func == 0 ) {
563
uassert( 10070 , "$where compile error", false);
564
return false; // didn't compile
567
if ( where->jsScope ){
568
where->scope->init( where->jsScope );
570
where->scope->setThis( const_cast< BSONObj * >( &jsobj ) );
571
where->scope->setObject( "obj", const_cast< BSONObj & >( jsobj ) );
572
where->scope->setBoolean( "fullObject" , true ); // this is a hack b/c fullObject used to be relevant
574
int err = where->scope->invoke( where->func , BSONObj() , 1000 * 60 , false );
575
where->scope->setThis( 0 );
576
if ( err == -3 ) { // INVOKE_ERROR
578
ss << "error on invocation of $where function:\n"
579
<< where->scope->getError();
580
uassert( 10071 , ss.str(), false);
582
} else if ( err != 0 ) { // ! INVOKE_SUCCESS
583
uassert( 10072 , "unknown error in invocation of $where function", false);
586
return where->scope->getBoolean( "return" ) != 0;
598
totsize=sizeof(JSObj2);
600
strcpy_s(sname, 7, "abcdef");
602
strcpy_s(sval, 10, "123456789");
613
struct JSUnitTest : public UnitTest {
616
BSONObj j1((const char *) &js1);
617
BSONObj j2((const char *) &js2);
619
assert( m.matches(j1) );
621
assert( !m.matches(j1) );
623
assert( n.matches(j1) );
624
assert( !n.matches(j2) );
626
BSONObj j0 = BSONObj();
627
// BSONObj j0((const char *) &js0);
629
assert( p.matches(j1) );
630
assert( p.matches(j2) );
636
struct RXTest : public UnitTest {
643
static const boost::regex e("(\\d{4}[- ]){3}\\d{4}");
644
static const boost::regex b(".....");
645
out() << "regex result: " << regex_match("hello", e) << endl;
646
out() << "regex result: " << regex_match("abcoo", b) << endl;
651
pcre_config( PCRE_CONFIG_UTF8 , &ret );
652
massert( 10342 , "pcre not compiled with utf8 support" , ret );
654
pcrecpp::RE re1(")({a}h.*o");
655
pcrecpp::RE re("h.llo");
656
assert( re.FullMatch("hello") );
657
assert( !re1.FullMatch("hello") );
660
pcrecpp::RE_Options options;
661
options.set_utf8(true);
662
pcrecpp::RE part("dwi", options);
663
assert( part.PartialMatch("dwight") );
665
pcre_config( PCRE_CONFIG_UNICODE_PROPERTIES , &ret );
667
cout << "warning: some regex utf8 things will not work. pcre build doesn't have --enable-unicode-properties" << endl;