1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
7
#ifndef _lucene_index_TermInfosReader_
8
#define _lucene_index_TermInfosReader_
12
#include "_SegmentTermEnum.h"
13
CL_CLASS_DEF(store,Directory)
14
//CL_CLASS_DEF(store,IndexInput)
15
#include "CLucene/util/_ThreadLocal.h"
16
//#include "FieldInfos.h"
17
//#include "TermInfo.h"
18
//#include "TermInfosWriter.h"
21
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
22
* Directory. Pairs are accessed either by Term or by ordinal position the
25
* PORT STATUS: 365707 (jlucene 1.9) -- started port to JLucene 2.3.2
27
class TermInfosReader :LUCENE_BASE{
29
CL_NS(store)::Directory* directory;
31
FieldInfos* fieldInfos;
33
CL_NS(util)::ThreadLocal<SegmentTermEnum*,
34
CL_NS(util)::Deletor::Object<SegmentTermEnum> > enumerators;
36
SegmentTermEnum* getEnum();
37
SegmentTermEnum* origEnum;
38
SegmentTermEnum* indexEnum;
41
Term* indexTerms; //note: this is a list of objects, not arrays!
42
int32_t indexTermsLength;
44
int64_t* indexPointers;
47
int32_t totalIndexInterval;
49
DEFINE_MUTEX(THIS_LOCK)
54
* Reads the TermInfos file (.tis) and eventually the Term Info Index file (.tii)
56
TermInfosReader(CL_NS(store)::Directory* dir, const char* segment, FieldInfos* fis,
57
const int32_t readBufferSize = CL_NS(store)::BufferedIndexInput::BUFFER_SIZE);
60
int32_t getSkipInterval() const;
61
int32_t getMaxSkipLevels() const;
64
* <p>Sets the indexDivisor, which subsamples the number
65
* of indexed terms loaded into memory. This has a
66
* similar effect as {@link
67
* IndexWriter#setTermIndexInterval} except that setting
68
* must be done at indexing time while this setting can be
69
* set per reader. When set to N, then one in every
70
* N*termIndexInterval terms in the index is loaded into
71
* memory. By setting this to a value > 1 you can reduce
72
* memory usage, at the expense of higher latency when
73
* loading a TermInfo. The default value is 1.</p>
75
* <b>NOTE:</b> you must call this before the term
76
* index is loaded. If the index is already loaded,
77
* an IllegalStateException is thrown.
79
* @throws IllegalStateException if the term index has
80
* already been loaded into memory.
82
void setIndexDivisor(const int32_t _indexDivisor);
84
/** Returns the indexDivisor.
85
* @see #setIndexDivisor
87
int32_t getIndexDivisor() const;
89
/** Close the enumeration of TermInfos */
92
/** Returns the number of term/value pairs in the set. */
96
* Returns an enumeration of terms starting at or after the named term.
97
* If no term is specified, an enumeration of all the Terms
98
* and TermInfos in the set is returned.
100
SegmentTermEnum* terms(const Term* term=NULL);
102
/** Returns the TermInfo for a Term in the set, or null. */
103
TermInfo* get(const Term* term);
105
/** Reads the term info index file or .tti file. */
106
void ensureIndexIsRead();
108
/** Returns the offset of the greatest index entry which is less than or equal to term.*/
109
int32_t getIndexOffset(const Term* term);
111
/** Reposition the current Term and TermInfo to indexOffset */
112
void seekEnum(const int32_t indexOffset);
114
/** Scans the Enumeration of terms for term and returns the corresponding TermInfo instance if found.
115
* The search is started from the current term.
117
TermInfo* scanEnum(const Term* term);
119
/** Scans the enumeration to the requested position and returns the Term located at that position */
120
Term* scanEnum(const int32_t position);
122
/** Returns the position of a Term in the set or -1. */
123
int64_t getPosition(const Term* term);
125
/** Returns the nth term in the set. synchronized */
126
Term* get(const int32_t position);