15
15
* You should have received a copy of the GNU Affero General Public License
16
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
18
* As a special exception, the copyright holders give permission to link the
19
* code of portions of this program with the OpenSSL library under certain
20
* conditions as described in each individual source file and distribute
21
* linked combinations including the program with the OpenSSL library. You
22
* must comply with the GNU Affero General Public License in all respects for
23
* all of the code used other than as permitted herein. If you modify file(s)
24
* with this exception, you may extend this exception to your version of the
25
* file(s), but you are not obligated to do so. If you do not wish to do so,
26
* delete this exception statement from your version. If you delete this
27
* exception statement from all source files in the program, then also delete
28
* it in the license file.
19
31
#include "mongo/pch.h"
21
33
#include "mongo/db/fts/fts_matcher.h"
34
#include "mongo/db/fts/fts_element_iterator.h"
22
35
#include "mongo/platform/strcasestr.h"
29
41
FTSMatcher::FTSMatcher( const FTSQuery& query, const FTSSpec& spec )
32
_stemmer( query.getLanguage() ){
41
52
// flagged for exclusion, i.e. "hello -world" we want to remove all
42
53
// results that include "world"
44
if ( _query.getNegatedTerms().size() == 0 )
55
if ( _query.getNegatedTerms().size() == 0 ) {
47
if ( _spec.wildcard() ) {
48
return _hasNegativeTerm_recurse(obj);
51
/* otherwise look at fields where weights are defined */
52
for ( Weights::const_iterator i = _spec.weights().begin();
53
i != _spec.weights().end();
55
const char * leftOverName = i->first.c_str();
56
BSONElement e = obj.getFieldDottedOrArray(leftOverName);
58
if ( e.type() == Array ) {
59
BSONObjIterator j( e.Obj() );
61
BSONElement x = j.next();
62
if ( leftOverName[0] && x.isABSONObj() )
63
x = x.Obj().getFieldDotted( leftOverName );
64
if ( x.type() == String )
65
if ( _hasNegativeTerm_string( x.String() ) )
69
else if ( e.type() == String ) {
70
if ( _hasNegativeTerm_string( e.String() ) )
77
bool FTSMatcher::_hasNegativeTerm_recurse(const BSONObj& obj ) const {
78
BSONObjIterator j( obj );
80
BSONElement x = j.next();
82
if ( _spec.languageOverrideField() == x.fieldName())
85
if (x.type() == String) {
86
if ( _hasNegativeTerm_string( x.String() ) )
89
else if ( x.isABSONObj() ) {
90
BSONObjIterator k( x.Obj() );
92
// check if k.next() is a obj/array or not
93
BSONElement y = k.next();
94
if ( y.type() == String ) {
95
if ( _hasNegativeTerm_string( y.String() ) )
98
else if ( y.isABSONObj() ) {
99
if ( _hasNegativeTerm_recurse( y.Obj() ) )
59
FTSElementIterator it( _spec, obj);
62
FTSIteratorValue val = it.next();
63
if (_hasNegativeTerm_string( val._language, val._text )) {
109
72
* Checks if any of the negTerms is in the tokenized string
110
73
* @param raw, the raw string to be tokenized
112
bool FTSMatcher::_hasNegativeTerm_string( const string& raw ) const {
75
bool FTSMatcher::_hasNegativeTerm_string( const FTSLanguage* language,
76
const string& raw ) const {
114
Tokenizer i( _query.getLanguage(), raw );
78
Tokenizer i( *language, raw );
79
Stemmer stemmer( *language );
115
80
while ( i.more() ) {
116
81
Token t = i.next();
117
82
if ( t.type != Token::TEXT )
119
string word = tolowerString( _stemmer.stem( t.data ) );
84
string word = stemmer.stem( tolowerString( t.data ) );
120
85
if ( _query.getNegatedTerms().count( word ) > 0 )
127
91
bool FTSMatcher::phrasesMatch( const BSONObj& obj ) const {
128
92
for (unsigned i = 0; i < _query.getPhr().size(); i++ ) {
129
93
if ( !phraseMatch( _query.getPhr()[i], obj ) ) {
145
108
* Checks if phrase is exactly matched in obj, returns true if so, false otherwise
146
109
* @param phrase, the string to be matched
147
110
* @param obj, document in the collection to match against
149
112
bool FTSMatcher::phraseMatch( const string& phrase, const BSONObj& obj ) const {
151
if ( _spec.wildcard() ) {
152
// case where everything is indexed (all fields)
153
return _phraseRecurse( phrase, obj );
156
for ( Weights::const_iterator i = _spec.weights().begin();
157
i != _spec.weights().end();
160
// figure out what the indexed field is.. ie. is it "field" or "field.subfield" etc.
161
const char * leftOverName = i->first.c_str();
162
BSONElement e = obj.getFieldDottedOrArray(leftOverName);
164
if ( e.type() == Array ) {
165
BSONObjIterator j( e.Obj() );
167
BSONElement x = j.next();
169
if ( leftOverName[0] && x.isABSONObj() )
170
x = x.Obj().getFieldDotted( leftOverName );
172
if ( x.type() == String )
173
if ( _phraseMatches( phrase, x.String() ) )
177
else if ( e.type() == String ) {
178
if ( _phraseMatches( phrase, e.String() ) )
187
* Recurses over all fields in the obj to match against phrase
188
* @param phrase, string to be matched
189
* @param obj, object to matched against
191
bool FTSMatcher::_phraseRecurse( const string& phrase, const BSONObj& obj ) const {
192
BSONObjIterator j( obj );
194
BSONElement x = j.next();
196
if ( _spec.languageOverrideField() == x.fieldName() )
199
if ( x.type() == String ) {
200
if ( _phraseMatches( phrase, x.String() ) )
203
else if ( x.isABSONObj() ) {
204
BSONObjIterator k( x.Obj() );
208
BSONElement y = k.next();
210
if ( y.type() == mongo::String ) {
211
if ( _phraseMatches( phrase, y.String() ) )
214
else if ( y.isABSONObj() ) {
215
if ( _phraseRecurse( phrase, y.Obj() ) )
113
FTSElementIterator it( _spec, obj);
115
while ( it.more() ) {
116
FTSIteratorValue val = it.next();
117
if (_phraseMatches( phrase, val._text )) {
228
126
* Looks for phrase in a raw string