4
* Copyright (C) 2012 10gen Inc.
6
* This program is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Affero General Public License, version 3,
8
* as published by the Free Software Foundation.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU Affero General Public License for more details.
15
* You should have received a copy of the GNU Affero General Public License
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
25
#include "mongo/db/fts/fts_util.h"
26
#include "mongo/db/fts/stemmer.h"
27
#include "mongo/db/fts/stop_words.h"
28
#include "mongo/db/fts/tokenizer.h"
29
#include "mongo/platform/unordered_map.h"
35
extern const double MAX_WEIGHT;
37
typedef std::map<string,double> Weights; // TODO cool map
39
typedef unordered_map<string,double> TermFrequencyMap;
45
Tools( string language )
46
: language( language ){}
47
const std::string& language;
48
const Stemmer* stemmer;
49
const StopWords* stopwords;
53
FTSSpec( const BSONObj& indexInfo );
55
bool wildcard() const { return _wildcard; }
56
const string& defaultLanguage() const { return _defaultLanguage; }
57
const string& languageOverrideField() const { return _languageOverrideField; }
59
size_t numExtraBefore() const { return _extraBefore.size(); }
60
const std::string& extraBefore( unsigned i ) const { return _extraBefore[i]; }
62
size_t numExtraAfter() const { return _extraAfter.size(); }
63
const std::string& extraAfter( unsigned i ) const { return _extraAfter[i]; }
65
string getLanguageToUse( const BSONObj& userDoc ) const;
67
void scoreDocument( const BSONObj& obj, TermFrequencyMap* scores ) const;
70
* given a query, pulls out the pieces (in order) that go in the index first
72
Status getIndexPrefix( const BSONObj& filter, BSONObj* out ) const;
74
const Weights& weights() const { return _weights; }
77
* @param out - untouched if field isn't present
78
* @return if field is here
80
bool weight( const StringData& field, double* out ) const;
83
static BSONObj fixSpec( const BSONObj& spec );
85
void _scoreRecurse(const Tools& tools,
87
TermFrequencyMap* term_freqs ) const;
89
void _scoreString( const Tools& tools,
90
const StringData& raw,
91
TermFrequencyMap* term_freqs,
92
double weight ) const;
94
string _defaultLanguage;
95
string _languageOverrideField;
98
// _weights stores a mapping between the fields and the value as a double
99
// basically, how much should an occurence of (query term) in (field) be worth
102
// other fields to index
103
std::vector<string> _extraBefore;
104
std::vector<string> _extraAfter;