1
package org.apache.lucene.index;
3
import org.apache.lucene.util.StringHelper;
5
import java.io.IOException;
6
import java.util.ArrayList;
9
* Licensed under the Apache License, Version 2.0 (the "License");
10
* you may not use this file except in compliance with the License.
11
* You may obtain a copy of the License at
13
* http://www.apache.org/licenses/LICENSE-2.0
15
* Unless required by applicable law or agreed to in writing, software
16
* distributed under the License is distributed on an "AS IS" BASIS,
17
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
* See the License for the specific language governing permissions and
19
* limitations under the License.
25
* Transparent access to the vector space model,
26
* either via TermFreqVector or by resolving it from the inverted index.
28
* Resolving a term vector from a large index can be a time consuming process.
30
* Warning! This class is not thread safe!
32
public class TermVectorAccessor {
34
public TermVectorAccessor() {
38
* Instance reused to save garbage collector some time
40
private TermVectorMapperDecorator decoratedMapper = new TermVectorMapperDecorator();
44
* Visits the TermVectorMapper and populates it with terms available for a given document,
45
* either via a vector created at index time or by resolving them from the inverted index.
47
* @param indexReader Index source
48
* @param documentNumber Source document to access
49
* @param fieldName Field to resolve
50
* @param mapper Mapper to be mapped with data
53
public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException {
55
fieldName = StringHelper.intern(fieldName);
57
decoratedMapper.decorated = mapper;
58
decoratedMapper.termVectorStored = false;
60
indexReader.getTermFreqVector(documentNumber, fieldName, decoratedMapper);
62
if (!decoratedMapper.termVectorStored) {
63
mapper.setDocumentNumber(documentNumber);
64
build(indexReader, fieldName, mapper, documentNumber);
68
/** Instance reused to save garbage collector some time */
69
private List<String> tokens;
71
/** Instance reused to save garbage collector some time */
72
private List<int[]> positions;
74
/** Instance reused to save garbage collector some time */
75
private List<Integer> frequencies;
79
* Populates the mapper with terms available for the given field in a document
80
* by resolving the inverted index.
83
* @param field interned field name
85
* @param documentNumber
88
private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {
91
tokens = new ArrayList<String>(500);
92
positions = new ArrayList<int[]>(500);
93
frequencies = new ArrayList<Integer>(500);
100
TermEnum termEnum = indexReader.terms(new Term(field, ""));
101
if (termEnum.term() != null) {
102
while (termEnum.term().field() == field) {
103
TermPositions termPositions = indexReader.termPositions(termEnum.term());
104
if (termPositions.skipTo(documentNumber)) {
106
frequencies.add(Integer.valueOf(termPositions.freq()));
107
tokens.add(termEnum.term().text());
110
if (!mapper.isIgnoringPositions()) {
111
int[] positions = new int[termPositions.freq()];
112
for (int i = 0; i < positions.length; i++) {
113
positions[i] = termPositions.nextPosition();
115
this.positions.add(positions);
120
termPositions.close();
121
if (!termEnum.next()) {
125
mapper.setDocumentNumber(documentNumber);
126
mapper.setExpectations(field, tokens.size(), false, !mapper.isIgnoringPositions());
127
for (int i = 0; i < tokens.size(); i++) {
128
mapper.map(tokens.get(i), frequencies.get(i).intValue(), (TermVectorOffsetInfo[]) null, positions.get(i));
137
private static class TermVectorMapperDecorator extends TermVectorMapper {
139
private TermVectorMapper decorated;
142
public boolean isIgnoringPositions() {
143
return decorated.isIgnoringPositions();
147
public boolean isIgnoringOffsets() {
148
return decorated.isIgnoringOffsets();
151
private boolean termVectorStored = false;
154
public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
155
decorated.setExpectations(field, numTerms, storeOffsets, storePositions);
156
termVectorStored = true;
160
public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
161
decorated.map(term, frequency, offsets, positions);
165
public void setDocumentNumber(int documentNumber) {
166
decorated.setDocumentNumber(documentNumber);