1
// fts_index_format.cpp
4
* Copyright (C) 2012 10gen Inc.
6
* This program is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU Affero General Public License, version 3,
8
* as published by the Free Software Foundation.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU Affero General Public License for more details.
15
* You should have received a copy of the GNU Affero General Public License
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19
#include "mongo/pch.h"
21
#include "mongo/base/init.h"
22
#include "mongo/db/fts/fts_index_format.h"
23
#include "mongo/util/mongoutils/str.h"
34
MONGO_INITIALIZER( FTSIndexFormat )( InitializerContext* context ) {
38
nullElt = nullObj.firstElement();
42
void FTSIndexFormat::getKeys( const FTSSpec& spec,
47
vector<BSONElement> extrasBefore;
48
vector<BSONElement> extrasAfter;
50
// compute the non FTS key elements
51
for ( unsigned i = 0; i < spec.numExtraBefore(); i++ ) {
52
BSONElement e = obj.getFieldDotted(spec.extraBefore(i));
55
uassert( 16675, "cannot have a multi-key as a prefix to a text index",
57
extrasBefore.push_back(e);
58
extraSize += e.size();
60
for ( unsigned i = 0; i < spec.numExtraAfter(); i++ ) {
61
BSONElement e = obj.getFieldDotted(spec.extraAfter(i));
64
extrasAfter.push_back(e);
65
extraSize += e.size();
69
TermFrequencyMap term_freqs;
70
spec.scoreDocument( obj, &term_freqs );
72
// create index keys from raw scores
76
mongoutils::str::stream() << "too many unique keys for a single document to"
77
<< " have a text index, max is " << term_freqs.size() << obj["_id"],
78
term_freqs.size() <= 400000 );
80
long long keyBSONSize = 0;
81
const int MaxKeyBSONSizeMB = 4;
83
for ( TermFrequencyMap::const_iterator i = term_freqs.begin();
84
i != term_freqs.end();
87
const string& term = i->first;
88
double weight = i->second;
90
// guess the total size of the btree entry based on the size of the weight, term tuple
92
5 /* bson overhead */ +
94
8 /* term overhead */ +
98
BSONObjBuilder b(guess); // builds a BSON object with guess length.
99
for ( unsigned k = 0; k < extrasBefore.size(); k++ )
100
b.appendAs( extrasBefore[k], "" );
101
_appendIndexKey( b, weight, term );
102
for ( unsigned k = 0; k < extrasAfter.size(); k++ )
103
b.appendAs( extrasAfter[k], "" );
104
BSONObj res = b.obj();
106
verify( guess >= res.objsize() );
110
keyBSONSize += res.objsize();
113
mongoutils::str::stream()
114
<< "trying to index text where term list is too big, max is "
115
<< MaxKeyBSONSizeMB << "mb " << obj["_id"],
116
keyBSONSize <= ( MaxKeyBSONSizeMB * 1024 * 1024 ) );
121
BSONObj FTSIndexFormat::getIndexKey( double weight,
123
const BSONObj& indexPrefix ) {
126
BSONObjIterator i( indexPrefix );
128
b.appendAs( i.next(), "" );
130
_appendIndexKey( b, weight, term );
134
void FTSIndexFormat::_appendIndexKey( BSONObjBuilder& b, double weight, const string& term ) {
135
verify( weight >= 0 && weight <= MAX_WEIGHT ); // FTSmaxweight = defined in fts_header
136
b.append( "", term );
137
b.append( "", weight );