1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
22
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
23
import org.apache.lucene.document.Fieldable;
24
import org.apache.lucene.index.FieldInfo.IndexOptions;
25
import org.apache.lucene.util.RamUsageEstimator;
27
// TODO: break into separate freq and prox writers as
28
// codecs; make separate container (tii/tis/skip/*) that can
29
// be configured as any number of files 1..N
30
final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implements Comparable<FreqProxTermsWriterPerField> {
32
final FreqProxTermsWriterPerThread perThread;
33
final TermsHashPerField termsHashPerField;
34
final FieldInfo fieldInfo;
35
final DocumentsWriter.DocState docState;
36
final FieldInvertState fieldState;
37
IndexOptions indexOptions;
38
PayloadAttribute payloadAttribute;
40
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
41
this.termsHashPerField = termsHashPerField;
42
this.perThread = perThread;
43
this.fieldInfo = fieldInfo;
44
docState = termsHashPerField.docState;
45
fieldState = termsHashPerField.fieldState;
46
indexOptions = fieldInfo.indexOptions;
50
int getStreamCount() {
51
if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
63
void skippingLongTerm() throws IOException {}
65
public int compareTo(FreqProxTermsWriterPerField other) {
66
return fieldInfo.name.compareTo(other.fieldInfo.name);
70
// Record, up front, whether our in-RAM format will be
71
// with or without term freqs:
72
indexOptions = fieldInfo.indexOptions;
73
payloadAttribute = null;
77
boolean start(Fieldable[] fields, int count) {
78
for(int i=0;i<count;i++)
79
if (fields[i].isIndexed())
85
void start(Fieldable f) {
86
if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
87
payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
89
payloadAttribute = null;
93
void writeProx(final int termID, int proxCode) {
94
final Payload payload;
95
if (payloadAttribute == null) {
98
payload = payloadAttribute.getPayload();
101
if (payload != null && payload.length > 0) {
102
termsHashPerField.writeVInt(1, (proxCode<<1)|1);
103
termsHashPerField.writeVInt(1, payload.length);
104
termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
107
termsHashPerField.writeVInt(1, proxCode<<1);
109
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
110
postings.lastPositions[termID] = fieldState.position;
115
void newTerm(final int termID) {
116
// First time we're seeing this term since the last
118
assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start");
120
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
121
postings.lastDocIDs[termID] = docState.docID;
122
if (indexOptions == IndexOptions.DOCS_ONLY) {
123
postings.lastDocCodes[termID] = docState.docID;
125
postings.lastDocCodes[termID] = docState.docID << 1;
126
postings.docFreqs[termID] = 1;
127
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
128
writeProx(termID, fieldState.position);
131
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
132
fieldState.uniqueTermCount++;
136
void addTerm(final int termID) {
138
assert docState.testPoint("FreqProxTermsWriterPerField.addTerm start");
140
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
142
assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
144
if (indexOptions == IndexOptions.DOCS_ONLY) {
145
if (docState.docID != postings.lastDocIDs[termID]) {
146
assert docState.docID > postings.lastDocIDs[termID];
147
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
148
postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
149
postings.lastDocIDs[termID] = docState.docID;
150
fieldState.uniqueTermCount++;
153
if (docState.docID != postings.lastDocIDs[termID]) {
154
assert docState.docID > postings.lastDocIDs[termID];
155
// Term not yet seen in the current doc but previously
156
// seen in other doc(s) since the last flush
158
// Now that we know doc freq for previous doc,
159
// write it & lastDocCode
160
if (1 == postings.docFreqs[termID])
161
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
163
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
164
termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
166
postings.docFreqs[termID] = 1;
167
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
168
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
169
postings.lastDocIDs[termID] = docState.docID;
170
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
171
writeProx(termID, fieldState.position);
173
fieldState.uniqueTermCount++;
175
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
176
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
177
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
184
ParallelPostingsArray createPostingsArray(int size) {
185
return new FreqProxPostingsArray(size);
188
static final class FreqProxPostingsArray extends ParallelPostingsArray {
189
public FreqProxPostingsArray(int size) {
191
docFreqs = new int[size];
192
lastDocIDs = new int[size];
193
lastDocCodes = new int[size];
194
lastPositions = new int[size];
197
int docFreqs[]; // # times this term occurs in the current doc
198
int lastDocIDs[]; // Last docID where this term occurred
199
int lastDocCodes[]; // Code for prior doc
200
int lastPositions[]; // Last position where this term occurred
203
ParallelPostingsArray newInstance(int size) {
204
return new FreqProxPostingsArray(size);
208
void copyTo(ParallelPostingsArray toArray, int numToCopy) {
209
assert toArray instanceof FreqProxPostingsArray;
210
FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
212
super.copyTo(toArray, numToCopy);
214
System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
215
System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
216
System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
217
System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
221
int bytesPerPosting() {
222
return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
226
public void abort() {}