1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.Closeable;
21
import java.io.IOException;
23
import org.apache.lucene.store.Directory;
24
import org.apache.lucene.util.BytesRef;
25
import org.apache.lucene.util.DoubleBarrelLRUCache;
26
import org.apache.lucene.util.CloseableThreadLocal;
28
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
29
* Directory. Pairs are accessed either by Term or by ordinal position the
32
final class TermInfosReader implements Closeable {
33
private final Directory directory;
34
private final String segment;
35
private final FieldInfos fieldInfos;
37
private final CloseableThreadLocal<ThreadResources> threadResources = new CloseableThreadLocal<ThreadResources>();
38
private final SegmentTermEnum origEnum;
39
private final long size;
41
private final TermInfosReaderIndex index;
42
private final int indexLength;
44
private final int totalIndexInterval;
46
private final static int DEFAULT_CACHE_SIZE = 1024;
48
// Just adds term's ord to TermInfo
49
private final static class TermInfoAndOrd extends TermInfo {
51
public TermInfoAndOrd(TermInfo ti, long termOrd) {
54
this.termOrd = termOrd;
58
private static class CloneableTerm extends DoubleBarrelLRUCache.CloneableKey {
59
private final Term term;
61
public CloneableTerm(Term t) {
62
this.term = new Term(t.field(), t.text());
66
public Object clone() {
67
return new CloneableTerm(term);
71
public boolean equals(Object _other) {
72
CloneableTerm other = (CloneableTerm) _other;
73
return term.equals(other.term);
77
public int hashCode() {
78
return term.hashCode();
82
private final DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd> termsCache = new DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd>(DEFAULT_CACHE_SIZE);
85
* Per-thread resources managed by ThreadLocal
87
private static final class ThreadResources {
88
SegmentTermEnum termEnum;
91
TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
92
throws CorruptIndexException, IOException {
93
boolean success = false;
95
if (indexDivisor < 1 && indexDivisor != -1) {
96
throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
104
origEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION),
105
readBufferSize), fieldInfos, false);
106
size = origEnum.size;
109
if (indexDivisor != -1) {
111
totalIndexInterval = origEnum.indexInterval * indexDivisor;
112
final String indexFileName = IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION);
113
final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(indexFileName,
114
readBufferSize), fieldInfos, true);
116
index = new TermInfosReaderIndex(indexEnum, indexDivisor, dir.fileLength(indexFileName), totalIndexInterval);
117
indexLength = index.length();
122
// Do not load terms index:
123
totalIndexInterval = -1;
129
// With lock-less commits, it's entirely possible (and
130
// fine) to hit a FileNotFound exception above. In
131
// this case, we want to explicitly close any subset
132
// of things that were opened so that we don't have to
133
// wait for a GC to do so.
140
public int getSkipInterval() {
141
return origEnum.skipInterval;
144
public int getMaxSkipLevels() {
145
return origEnum.maxSkipLevels;
148
public final void close() throws IOException {
149
if (origEnum != null)
151
threadResources.close();
154
/** Returns the number of term/value pairs in the set. */
159
private ThreadResources getThreadResources() {
160
ThreadResources resources = threadResources.get();
161
if (resources == null) {
162
resources = new ThreadResources();
163
resources.termEnum = terms();
164
threadResources.set(resources);
169
/** Returns the TermInfo for a Term in the set, or null. */
170
TermInfo get(Term term) throws IOException {
171
BytesRef termBytesRef = new BytesRef(term.text);
172
return get(term, false, termBytesRef);
175
/** Returns the TermInfo for a Term in the set, or null. */
176
private TermInfo get(Term term, boolean mustSeekEnum, BytesRef termBytesRef) throws IOException {
177
if (size == 0) return null;
181
final CloneableTerm cacheKey = new CloneableTerm(term);
183
TermInfoAndOrd tiOrd = termsCache.get(cacheKey);
184
ThreadResources resources = getThreadResources();
186
if (!mustSeekEnum && tiOrd != null) {
190
// optimize sequential access: first try scanning cached enum w/o seeking
191
SegmentTermEnum enumerator = resources.termEnum;
192
if (enumerator.term() != null // term is at or past current
193
&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
194
|| term.compareTo(enumerator.term()) >= 0)) {
195
int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
196
if (indexLength == enumOffset // but before end of block
197
|| index.compareTo(term,termBytesRef,enumOffset) < 0) {
202
int numScans = enumerator.scanTo(term);
203
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
204
ti = enumerator.termInfo();
206
// we only want to put this TermInfo into the cache if
207
// scanEnum skipped more than one dictionary entry.
208
// This prevents RangeQueries or WildcardQueries to
209
// wipe out the cache when they iterate over a large numbers
212
termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
214
assert sameTermInfo(ti, tiOrd, enumerator);
215
assert (int) enumerator.position == tiOrd.termOrd;
226
// random-access: must seek
229
indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
231
// Must do binary search:
232
indexPos = index.getIndexOffset(term,termBytesRef);
235
index.seekEnum(enumerator, indexPos);
236
enumerator.scanTo(term);
238
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
239
ti = enumerator.termInfo();
241
termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
243
assert sameTermInfo(ti, tiOrd, enumerator);
244
assert enumerator.position == tiOrd.termOrd;
252
// called only from asserts
253
private final boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
254
if (ti1.docFreq != ti2.docFreq) {
257
if (ti1.freqPointer != ti2.freqPointer) {
260
if (ti1.proxPointer != ti2.proxPointer) {
263
// skipOffset is only valid when docFreq >= skipInterval:
264
if (ti1.docFreq >= enumerator.skipInterval &&
265
ti1.skipOffset != ti2.skipOffset) {
271
private void ensureIndexIsRead() {
273
throw new IllegalStateException("terms index was not loaded when this reader was created");
277
/** Returns the position of a Term in the set or -1. */
278
final long getPosition(Term term) throws IOException {
279
if (size == 0) return -1;
282
BytesRef termBytesRef = new BytesRef(term.text);
283
int indexOffset = index.getIndexOffset(term,termBytesRef);
285
SegmentTermEnum enumerator = getThreadResources().termEnum;
286
index.seekEnum(enumerator, indexOffset);
288
while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
290
if (term.compareTo(enumerator.term()) == 0)
291
return enumerator.position;
296
/** Returns an enumeration of all the Terms and TermInfos in the set. */
297
public SegmentTermEnum terms() {
298
return (SegmentTermEnum)origEnum.clone();
301
/** Returns an enumeration of terms starting at or after the named term. */
302
public SegmentTermEnum terms(Term term) throws IOException {
303
BytesRef termBytesRef = new BytesRef(term.text);
304
get(term, true, termBytesRef);
305
return (SegmentTermEnum)getThreadResources().termEnum.clone();