4
* Copyright (C) 2012 10gen Inc.
6
* This program is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Affero General Public License, version 3,
8
* as published by the Free Software Foundation.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU Affero General Public License for more details.
15
* You should have received a copy of the GNU Affero General Public License
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19
#include "mongo/pch.h"
21
#include "mongo/db/fts/fts_query.h"
22
#include "mongo/db/fts/tokenizer.h"
23
#include "mongo/util/mongoutils/str.h"
24
#include "mongo/util/stringutils.h"
30
using namespace mongoutils;
32
Status FTSQuery::parse(const string& query, const string& language) {
36
const StopWords* stopWords = StopWords::getStopWords( language );
37
Stemmer stemmer( language );
39
bool inNegation = false;
40
bool inPhrase = false;
42
unsigned quoteOffset = 0;
44
Tokenizer i( _language, query );
48
if ( t.type == Token::TEXT ) {
49
string s = t.data.toString();
51
if ( inPhrase && inNegation ) {
55
_addTerm( stopWords, stemmer, s, inNegation );
58
if ( inNegation && !inPhrase )
61
else if ( t.type == Token::DELIMITER ) {
64
if ( t.previousWhiteSpace )
67
else if ( c == '"' ) {
70
unsigned phraseStart = quoteOffset + 1;
71
unsigned phraseLength = t.offset - phraseStart;
72
StringData phrase = StringData( query ).substr( phraseStart,
75
_negatedPhrases.push_back( tolowerString( phrase ) );
77
_phrases.push_back( tolowerString( phrase ) );
84
quoteOffset = t.offset;
96
void FTSQuery::_addTerm( const StopWords* sw, Stemmer& stemmer, const string& term, bool negated ) {
97
string word = tolowerString( term );
98
if ( sw->isStopWord( word ) )
100
word = stemmer.stem( word );
102
_negatedTerms.insert( word );
104
_terms.push_back( word );
108
void _debugHelp( stringstream& ss, const set<string>& s, const string& sep ) {
110
for ( set<string>::const_iterator i = s.begin(); i != s.end(); ++i ) {
119
void _debugHelp( stringstream& ss, const vector<string>& v, const string& sep ) {
120
set<string> s( v.begin(), v.end() );
121
_debugHelp( ss, s, sep );
124
void _debugHelp( stringstream& ss, const unordered_set<string>& v, const string& sep ) {
125
set<string> s( v.begin(), v.end() );
126
_debugHelp( ss, s, sep );
131
string FTSQuery::toString() const {
136
_debugHelp( ss, getTerms(), ", " );
139
ss << " negated terms: ";
140
_debugHelp( ss, getNegatedTerms(), ", " );
144
_debugHelp( ss, getPhr(), ", " );
147
ss << " negated phrases: ";
148
_debugHelp( ss, getNegatedPhr(), ", " );
154
string FTSQuery::debugString() const {
157
_debugHelp( ss, getTerms(), "|" );
160
_debugHelp( ss, getNegatedTerms(), "|" );
163
_debugHelp( ss, getPhr(), "|" );
166
_debugHelp( ss, getNegatedPhr(), "|" );