1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.util.Collection;
23
import org.apache.lucene.util.LuceneTestCase;
24
import org.apache.lucene.analysis.Analyzer;
25
import org.apache.lucene.analysis.MockAnalyzer;
26
import org.apache.lucene.document.Document;
27
import org.apache.lucene.document.Field;
28
import org.apache.lucene.index.FieldInfo.IndexOptions;
29
import org.apache.lucene.search.*;
30
import org.apache.lucene.search.BooleanClause.Occur;
31
import org.apache.lucene.store.Directory;
32
import org.apache.lucene.search.Explanation.IDFExplanation;
35
public class TestOmitTf extends LuceneTestCase {
37
public static class SimpleSimilarity extends Similarity {
38
@Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
39
@Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
40
@Override public float tf(float freq) { return freq; }
41
@Override public float sloppyFreq(int distance) { return 2.0f; }
42
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
43
@Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
44
@Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
45
return new IDFExplanation() {
47
public float getIdf() {
51
public String explain() {
52
return "Inexplicable";
58
// Tests whether the DocumentWriter correctly enable the
59
// omitTermFreqAndPositions bit in the FieldInfo
60
public void testOmitTermFreqAndPositions() throws Exception {
61
Directory ram = newDirectory();
62
Analyzer analyzer = new MockAnalyzer(random);
63
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
64
Document d = new Document();
66
// this field will have Tf
67
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
70
// this field will NOT have Tf
71
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
72
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
75
writer.addDocument(d);
77
// now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
78
// keep things constant
82
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
85
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
88
writer.addDocument(d);
94
SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
95
FieldInfos fi = reader.fieldInfos();
96
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
97
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
103
// Tests whether merging of docs that have different
104
// omitTermFreqAndPositions for the same field works
105
public void testMixedMerge() throws Exception {
106
Directory ram = newDirectory();
107
Analyzer analyzer = new MockAnalyzer(random);
108
IndexWriter writer = new IndexWriter(
110
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
111
setMaxBufferedDocs(3).
112
setMergePolicy(newLogMergePolicy(2))
114
writer.setInfoStream(VERBOSE ? System.out : null);
115
Document d = new Document();
117
// this field will have Tf
118
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
121
// this field will NOT have Tf
122
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
123
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
126
for(int i=0;i<30;i++)
127
writer.addDocument(d);
129
// now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
130
// keep things constant
134
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
137
f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
140
for(int i=0;i<30;i++)
141
writer.addDocument(d);
144
writer.forceMerge(1);
148
SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
149
FieldInfos fi = reader.fieldInfos();
150
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
151
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
157
// Make sure first adding docs that do not omitTermFreqAndPositions for
158
// field X, then adding docs that do omitTermFreqAndPositions for that same
160
public void testMixedRAM() throws Exception {
161
Directory ram = newDirectory();
162
Analyzer analyzer = new MockAnalyzer(random);
163
IndexWriter writer = new IndexWriter(
165
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
166
setMaxBufferedDocs(10).
167
setMergePolicy(newLogMergePolicy(2))
169
Document d = new Document();
171
// this field will have Tf
172
Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
175
// this field will NOT have Tf
176
Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
180
writer.addDocument(d);
182
f2.setIndexOptions(IndexOptions.DOCS_ONLY);
184
for(int i=0;i<20;i++)
185
writer.addDocument(d);
188
writer.forceMerge(1);
193
SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
194
FieldInfos fi = reader.fieldInfos();
195
assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
196
assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
202
private void assertNoPrx(Directory dir) throws Throwable {
203
final String[] files = dir.listAll();
204
for(int i=0;i<files.length;i++) {
205
assertFalse(files[i].endsWith(".prx"));
206
assertFalse(files[i].endsWith(".pos"));
210
// Verifies no *.prx exists when all fields omit term freq:
211
public void testNoPrxFile() throws Throwable {
212
Directory ram = newDirectory();
213
Analyzer analyzer = new MockAnalyzer(random);
214
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
215
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
216
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
217
lmp.setMergeFactor(2);
218
lmp.setUseCompoundFile(false);
219
Document d = new Document();
221
Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
222
f1.setIndexOptions(IndexOptions.DOCS_ONLY);
225
for(int i=0;i<30;i++)
226
writer.addDocument(d);
232
// now add some documents with positions, and check
233
// there is no prox after full merge
235
f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
238
for(int i=0;i<30;i++)
239
writer.addDocument(d);
242
writer.forceMerge(1);
250
// Test scores with one field with Term Freqs and one without, otherwise with equal content
251
public void testBasic() throws Exception {
252
Directory dir = newDirectory();
253
Analyzer analyzer = new MockAnalyzer(random);
254
IndexWriter writer = new IndexWriter(
256
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
257
setMaxBufferedDocs(2).
258
setSimilarity(new SimpleSimilarity()).
259
setMergePolicy(newLogMergePolicy(2))
261
writer.setInfoStream(VERBOSE ? System.out : null);
263
StringBuilder sb = new StringBuilder(265);
264
String term = "term";
265
for(int i = 0; i<30; i++){
266
Document d = new Document();
267
sb.append(term).append(" ");
268
String content = sb.toString();
269
Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
270
noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
273
Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
276
writer.addDocument(d);
277
//System.out.println(d);
280
writer.forceMerge(1);
287
IndexReader reader = IndexReader.open(dir);
288
IndexSearcher searcher = new IndexSearcher(reader);
289
searcher.setSimilarity(new SimpleSimilarity());
291
Term a = new Term("noTf", term);
292
Term b = new Term("tf", term);
293
Term c = new Term("noTf", "notf");
294
Term d = new Term("tf", "tf");
295
TermQuery q1 = new TermQuery(a);
296
TermQuery q2 = new TermQuery(b);
297
TermQuery q3 = new TermQuery(c);
298
TermQuery q4 = new TermQuery(d);
302
new CountingHitCollector() {
303
private Scorer scorer;
305
public final void setScorer(Scorer scorer) {
306
this.scorer = scorer;
309
public final void collect(int doc) throws IOException {
310
//System.out.println("Q1: Doc=" + doc + " score=" + score);
311
float score = scorer.score();
312
assertTrue(score==1.0f);
316
//System.out.println(CountingHitCollector.getCount());
320
new CountingHitCollector() {
321
private Scorer scorer;
323
public final void setScorer(Scorer scorer) {
324
this.scorer = scorer;
327
public final void collect(int doc) throws IOException {
328
//System.out.println("Q2: Doc=" + doc + " score=" + score);
329
float score = scorer.score();
330
assertEquals(1.0f+doc, score, 0.00001f);
334
//System.out.println(CountingHitCollector.getCount());
341
new CountingHitCollector() {
342
private Scorer scorer;
344
public final void setScorer(Scorer scorer) {
345
this.scorer = scorer;
348
public final void collect(int doc) throws IOException {
349
//System.out.println("Q1: Doc=" + doc + " score=" + score);
350
float score = scorer.score();
351
assertTrue(score==1.0f);
352
assertFalse(doc%2==0);
356
//System.out.println(CountingHitCollector.getCount());
360
new CountingHitCollector() {
361
private Scorer scorer;
363
public final void setScorer(Scorer scorer) {
364
this.scorer = scorer;
367
public final void collect(int doc) throws IOException {
368
float score = scorer.score();
369
//System.out.println("Q1: Doc=" + doc + " score=" + score);
370
assertTrue(score==1.0f);
371
assertTrue(doc%2==0);
375
//System.out.println(CountingHitCollector.getCount());
379
BooleanQuery bq = new BooleanQuery();
380
bq.add(q1,Occur.MUST);
381
bq.add(q4,Occur.MUST);
384
new CountingHitCollector() {
386
public final void collect(int doc) throws IOException {
387
//System.out.println("BQ: Doc=" + doc + " score=" + score);
391
assertTrue(15 == CountingHitCollector.getCount());
398
public static class CountingHitCollector extends Collector {
401
private int docBase = -1;
402
CountingHitCollector(){count=0;sum=0;}
404
public void setScorer(Scorer scorer) throws IOException {}
406
public void collect(int doc) throws IOException {
408
sum += doc + docBase; // use it to avoid any possibility of being merged away
411
public static int getCount() { return count; }
412
public static int getSum() { return sum; }
415
public void setNextReader(IndexReader reader, int docBase) {
416
this.docBase = docBase;
419
public boolean acceptsDocsOutOfOrder() {