2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.solr.highlight;
19
import java.io.IOException;
20
import java.io.StringReader;
21
import java.util.ArrayList;
22
import java.util.Collections;
23
import java.util.Comparator;
24
import java.util.HashSet;
25
import java.util.LinkedList;
26
import java.util.List;
27
import java.util.ListIterator;
31
import org.apache.lucene.analysis.CachingTokenFilter;
32
import org.apache.lucene.analysis.TokenFilter;
33
import org.apache.lucene.analysis.TokenStream;
34
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
35
import org.apache.lucene.document.Document;
36
import org.apache.lucene.search.Query;
37
import org.apache.lucene.search.highlight.*;
38
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
39
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
40
import org.apache.lucene.search.vectorhighlight.FieldQuery;
41
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
42
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
43
import org.apache.lucene.util.AttributeSource.State;
44
import org.apache.solr.common.SolrException;
45
import org.apache.solr.common.params.HighlightParams;
46
import org.apache.solr.common.params.SolrParams;
47
import org.apache.solr.common.util.NamedList;
48
import org.apache.solr.common.util.SimpleOrderedMap;
49
import org.apache.solr.core.SolrConfig;
50
import org.apache.solr.core.PluginInfo;
51
import org.apache.solr.core.SolrCore;
52
import org.apache.solr.request.SolrQueryRequest;
53
import org.apache.solr.schema.IndexSchema;
54
import org.apache.solr.schema.SchemaField;
55
import org.apache.solr.search.DocIterator;
56
import org.apache.solr.search.DocList;
57
import org.apache.solr.search.SolrIndexSearcher;
58
import org.apache.solr.util.plugin.PluginInfoInitialized;
59
import org.slf4j.Logger;
60
import org.slf4j.LoggerFactory;
66
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
69
public static Logger log = LoggerFactory.getLogger(DefaultSolrHighlighter.class);
71
private SolrCore solrCore;
73
public DefaultSolrHighlighter() {
76
public DefaultSolrHighlighter(SolrCore solrCore) {
77
this.solrCore = solrCore;
80
public void init(PluginInfo info) {
84
fragListBuilders.clear();
85
fragmentsBuilders.clear();
86
boundaryScanners.clear();
88
// Load the fragmenters
89
SolrFragmenter frag = solrCore.initPlugins(info.getChildren("fragmenter") , fragmenters,SolrFragmenter.class,null);
90
if (frag == null) frag = new GapFragmenter();
91
fragmenters.put("", frag);
92
fragmenters.put(null, frag);
94
// Load the formatters
95
SolrFormatter fmt = solrCore.initPlugins(info.getChildren("formatter"), formatters,SolrFormatter.class,null);
96
if (fmt == null) fmt = new HtmlFormatter();
97
formatters.put("", fmt);
98
formatters.put(null, fmt);
100
// Load the formatters
101
SolrEncoder enc = solrCore.initPlugins(info.getChildren("encoder"), encoders,SolrEncoder.class,null);
102
if (enc == null) enc = new DefaultEncoder();
103
encoders.put("", enc);
104
encoders.put(null, enc);
106
// Load the FragListBuilders
107
SolrFragListBuilder fragListBuilder = solrCore.initPlugins(info.getChildren("fragListBuilder"),
108
fragListBuilders, SolrFragListBuilder.class, null );
109
if( fragListBuilder == null ) fragListBuilder = new SimpleFragListBuilder();
110
fragListBuilders.put( "", fragListBuilder );
111
fragListBuilders.put( null, fragListBuilder );
113
// Load the FragmentsBuilders
114
SolrFragmentsBuilder fragsBuilder = solrCore.initPlugins(info.getChildren("fragmentsBuilder"),
115
fragmentsBuilders, SolrFragmentsBuilder.class, null);
116
if( fragsBuilder == null ) fragsBuilder = new ScoreOrderFragmentsBuilder();
117
fragmentsBuilders.put( "", fragsBuilder );
118
fragmentsBuilders.put( null, fragsBuilder );
120
// Load the BoundaryScanners
121
SolrBoundaryScanner boundaryScanner = solrCore.initPlugins(info.getChildren("boundaryScanner"),
122
boundaryScanners, SolrBoundaryScanner.class, null);
123
if(boundaryScanner == null) boundaryScanner = new SimpleBoundaryScanner();
124
boundaryScanners.put("", boundaryScanner);
125
boundaryScanners.put(null, boundaryScanner);
129
//just for back-compat with the deprecated method
130
private boolean initialized = false;
133
public void initalize( SolrConfig config) {
134
if (initialized) return;
135
SolrFragmenter frag = new GapFragmenter();
136
fragmenters.put("", frag);
137
fragmenters.put(null, frag);
139
SolrFormatter fmt = new HtmlFormatter();
140
formatters.put("", fmt);
141
formatters.put(null, fmt);
143
SolrEncoder enc = new DefaultEncoder();
144
encoders.put("", enc);
145
encoders.put(null, enc);
147
SolrFragListBuilder fragListBuilder = new SimpleFragListBuilder();
148
fragListBuilders.put( "", fragListBuilder );
149
fragListBuilders.put( null, fragListBuilder );
151
SolrFragmentsBuilder fragsBuilder = new ScoreOrderFragmentsBuilder();
152
fragmentsBuilders.put( "", fragsBuilder );
153
fragmentsBuilders.put( null, fragsBuilder );
155
SolrBoundaryScanner boundaryScanner = new SimpleBoundaryScanner();
156
boundaryScanners.put("", boundaryScanner);
157
boundaryScanners.put(null, boundaryScanner);
161
* Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
162
* @param query The current Query
163
* @param fieldName The name of the field
164
* @param request The current SolrQueryRequest
165
* @param tokenStream document text CachingTokenStream
166
* @throws IOException
168
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream) throws IOException {
169
SolrParams params = request.getParams();
170
Highlighter highlighter = null;
172
highlighter = new Highlighter(
173
getFormatter(fieldName, params),
174
getEncoder(fieldName, params),
175
getSpanQueryScorer(query, fieldName, tokenStream, request));
177
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
183
* Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
184
* @param query The current Query
185
* @param fieldName The name of the field
186
* @param request The current SolrQueryRequest
188
protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
189
SolrParams params = request.getParams();
190
Highlighter highlighter = new Highlighter(
191
getFormatter(fieldName, params),
192
getEncoder(fieldName, params),
193
getQueryScorer(query, fieldName, request));
194
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
199
* Return a {@link org.apache.lucene.search.highlight.QueryScorer} suitable for this Query and field.
200
* @param query The current query
201
* @param tokenStream document text CachingTokenStream
202
* @param fieldName The name of the field
203
* @param request The SolrQueryRequest
204
* @throws IOException
206
private QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) throws IOException {
207
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
208
Boolean highlightMultiTerm = request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true);
209
if(highlightMultiTerm == null) {
210
highlightMultiTerm = false;
214
scorer = new QueryScorer(query, fieldName);
217
scorer = new QueryScorer(query, null);
219
scorer.setExpandMultiTermQuery(highlightMultiTerm);
224
* Return a {@link org.apache.lucene.search.highlight.Scorer} suitable for this Query and field.
225
* @param query The current query
226
* @param fieldName The name of the field
227
* @param request The SolrQueryRequest
229
private Scorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
230
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
232
return new QueryTermScorer(query, request.getSearcher().getReader(), fieldName);
235
return new QueryTermScorer(query);
240
* Return the max number of snippets for this field. If this has not
241
* been configured for this field, fall back to the configured default
242
* or the solr default.
243
* @param fieldName The name of the field
244
* @param params The params controlling Highlighting
246
protected int getMaxSnippets(String fieldName, SolrParams params) {
247
return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
251
* Return whether adjacent fragments should be merged.
252
* @param fieldName The name of the field
253
* @param params The params controlling Highlighting
255
protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){
256
return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
260
* Return a {@link org.apache.lucene.search.highlight.Formatter} appropriate for this field. If a formatter
261
* has not been configured for this field, fall back to the configured
262
* default or the solr default ({@link org.apache.lucene.search.highlight.SimpleHTMLFormatter}).
264
* @param fieldName The name of the field
265
* @param params The params controlling Highlighting
266
* @return An appropriate {@link org.apache.lucene.search.highlight.Formatter}.
268
protected Formatter getFormatter(String fieldName, SolrParams params )
270
String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
271
SolrFormatter formatter = formatters.get( str );
272
if( formatter == null ) {
273
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str );
275
return formatter.getFormatter( fieldName, params );
279
* Return an {@link org.apache.lucene.search.highlight.Encoder} appropriate for this field. If an encoder
280
* has not been configured for this field, fall back to the configured
281
* default or the solr default ({@link org.apache.lucene.search.highlight.DefaultEncoder}).
283
* @param fieldName The name of the field
284
* @param params The params controlling Highlighting
285
* @return An appropriate {@link org.apache.lucene.search.highlight.Encoder}.
287
protected Encoder getEncoder(String fieldName, SolrParams params){
288
String str = params.getFieldParam( fieldName, HighlightParams.ENCODER );
289
SolrEncoder encoder = encoders.get( str );
290
if( encoder == null ) {
291
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown encoder: "+str );
293
return encoder.getEncoder( fieldName, params );
297
* Return a {@link org.apache.lucene.search.highlight.Fragmenter} appropriate for this field. If a fragmenter
298
* has not been configured for this field, fall back to the configured
299
* default or the solr default ({@link GapFragmenter}).
301
* @param fieldName The name of the field
302
* @param params The params controlling Highlighting
303
* @return An appropriate {@link org.apache.lucene.search.highlight.Fragmenter}.
305
protected Fragmenter getFragmenter(String fieldName, SolrParams params)
307
String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER );
308
SolrFragmenter frag = fragmenters.get( fmt );
310
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt );
312
return frag.getFragmenter( fieldName, params );
315
protected FragListBuilder getFragListBuilder( String fieldName, SolrParams params ){
316
String flb = params.getFieldParam( fieldName, HighlightParams.FRAG_LIST_BUILDER );
317
SolrFragListBuilder solrFlb = fragListBuilders.get( flb );
318
if( solrFlb == null ){
319
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragListBuilder: " + flb );
321
return solrFlb.getFragListBuilder( params );
324
protected FragmentsBuilder getFragmentsBuilder( String fieldName, SolrParams params ){
325
BoundaryScanner bs = getBoundaryScanner(fieldName, params);
326
return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder( params, bs );
329
private SolrFragmentsBuilder getSolrFragmentsBuilder( String fieldName, SolrParams params ){
330
String fb = params.getFieldParam( fieldName, HighlightParams.FRAGMENTS_BUILDER );
331
SolrFragmentsBuilder solrFb = fragmentsBuilders.get( fb );
332
if( solrFb == null ){
333
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb );
338
private BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params){
339
String bs = params.getFieldParam(fieldName, HighlightParams.BOUNDARY_SCANNER);
340
SolrBoundaryScanner solrBs = boundaryScanners.get(bs);
342
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown boundaryScanner: " + bs);
344
return solrBs.getBoundaryScanner(fieldName, params);
348
* Generates a list of Highlighted query fragments for each item in a list
349
* of documents, or returns null if highlighting is disabled.
351
* @param docs query results
352
* @param query the query
353
* @param req the current request
354
* @param defaultFields default list of fields to summarize
356
* @return NamedList containing a NamedList for each document, which in
357
* turns contains sets (field, summary) pairs.
360
@SuppressWarnings("unchecked")
361
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
362
SolrParams params = req.getParams();
363
if (!isHighlightingEnabled(params))
366
SolrIndexSearcher searcher = req.getSearcher();
367
IndexSchema schema = searcher.getSchema();
368
NamedList fragments = new SimpleOrderedMap();
369
String[] fieldNames = getHighlightFields(query, req, defaultFields);
370
Set<String> fset = new HashSet<String>();
373
// pre-fetch documents using the Searcher's doc cache
374
for(String f : fieldNames) { fset.add(f); }
375
// fetch unique key if one exists.
376
SchemaField keyField = schema.getUniqueKeyField();
378
fset.add(keyField.getName());
381
// get FastVectorHighlighter instance out of the processing loop
382
FastVectorHighlighter fvh = new FastVectorHighlighter(
383
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
384
params.getBool( HighlightParams.USE_PHRASE_HIGHLIGHTER, true ),
385
// FVH cannot process hl.requireFieldMatch parameter per-field basis
386
params.getBool( HighlightParams.FIELD_MATCH, false ) );
387
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
388
FieldQuery fieldQuery = fvh.getFieldQuery( query, searcher.getIndexReader() );
390
// Highlight each document
391
DocIterator iterator = docs.iterator();
392
for (int i = 0; i < docs.size(); i++) {
393
int docId = iterator.nextDoc();
394
Document doc = searcher.doc(docId, fset);
395
NamedList docSummaries = new SimpleOrderedMap();
396
for (String fieldName : fieldNames) {
397
fieldName = fieldName.trim();
398
if( useFastVectorHighlighter( params, schema, fieldName ) )
399
doHighlightingByFastVectorHighlighter( fvh, fieldQuery, req, docSummaries, docId, doc, fieldName );
401
doHighlightingByHighlighter( query, req, docSummaries, docId, doc, fieldName );
403
String printId = schema.printableUniqueKey(doc);
404
fragments.add(printId == null ? null : printId, docSummaries);
410
* If fieldName is undefined, this method returns false, then
411
* doHighlightingByHighlighter() will do nothing for the field.
413
private boolean useFastVectorHighlighter( SolrParams params, IndexSchema schema, String fieldName ){
414
SchemaField schemaField = schema.getFieldOrNull( fieldName );
415
return schemaField != null &&
416
schemaField.storeTermPositions() &&
417
schemaField.storeTermOffsets() &&
418
params.getFieldBool( fieldName, HighlightParams.USE_FVH, false );
421
private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
422
int docId, Document doc, String fieldName ) throws IOException {
423
SolrParams params = req.getParams();
424
String[] docTexts = doc.getValues(fieldName);
425
// according to Document javadoc, doc.getValues() never returns null. check empty instead of null
426
if (docTexts.length == 0) return;
428
SolrIndexSearcher searcher = req.getSearcher();
429
IndexSchema schema = searcher.getSchema();
430
TokenStream tstream = null;
431
int numFragments = getMaxSnippets(fieldName, params);
432
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
434
String[] summaries = null;
435
List<TextFragment> frags = new ArrayList<TextFragment>();
437
TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
439
TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
440
if (tvStream != null) {
441
tots = new TermOffsetsTokenStream(tvStream);
444
catch (IllegalArgumentException e) {
445
// No problem. But we can't use TermOffsets optimization.
448
for (int j = 0; j < docTexts.length; j++) {
450
// if we're using TermOffsets optimization, then get the next
451
// field value's TokenStream (i.e. get field j's TokenStream) from tots:
452
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
454
// fall back to analyzer
455
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
458
int maxCharsToAnalyze = params.getFieldInt(fieldName,
459
HighlightParams.MAX_CHARS,
460
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
462
Highlighter highlighter;
463
if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
464
// TODO: this is not always necessary - eventually we would like to avoid this wrap
465
// when it is not needed.
466
if (maxCharsToAnalyze < 0) {
467
tstream = new CachingTokenFilter(tstream);
469
tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
473
highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);
475
// after highlighter initialization, reset tstream since construction of highlighter already used it
480
highlighter = getHighlighter(query, fieldName, req);
483
if (maxCharsToAnalyze < 0) {
484
highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
486
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
490
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
491
for (int k = 0; k < bestTextFragments.length; k++) {
492
if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
493
frags.add(bestTextFragments[k]);
496
} catch (InvalidTokenOffsetsException e) {
497
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
500
// sort such that the fragments with the highest score come first
501
Collections.sort(frags, new Comparator<TextFragment>() {
502
public int compare(TextFragment arg0, TextFragment arg1) {
503
return Math.round(arg1.getScore() - arg0.getScore());
507
// convert fragments back into text
508
// TODO: we can include score and position information in output as snippet attributes
509
if (frags.size() > 0) {
510
ArrayList<String> fragTexts = new ArrayList<String>();
511
for (TextFragment fragment: frags) {
512
if ((fragment != null) && (fragment.getScore() > 0)) {
513
fragTexts.add(fragment.toString());
515
if (fragTexts.size() >= numFragments) break;
517
summaries = fragTexts.toArray(new String[0]);
518
if (summaries.length > 0)
519
docSummaries.add(fieldName, summaries);
521
// no summeries made, copy text from alternate field
522
if (summaries == null || summaries.length == 0) {
523
alternateField( docSummaries, params, doc, fieldName );
527
private void doHighlightingByFastVectorHighlighter( FastVectorHighlighter highlighter, FieldQuery fieldQuery,
528
SolrQueryRequest req, NamedList docSummaries, int docId, Document doc,
529
String fieldName ) throws IOException {
530
SolrParams params = req.getParams();
531
SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder( fieldName, params );
532
String[] snippets = highlighter.getBestFragments( fieldQuery, req.getSearcher().getReader(), docId, fieldName,
533
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
534
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ),
535
getFragListBuilder( fieldName, params ),
536
getFragmentsBuilder( fieldName, params ),
537
solrFb.getPreTags( params, fieldName ),
538
solrFb.getPostTags( params, fieldName ),
539
getEncoder( fieldName, params ) );
540
if( snippets != null && snippets.length > 0 )
541
docSummaries.add( fieldName, snippets );
543
alternateField( docSummaries, params, doc, fieldName );
546
private void alternateField( NamedList docSummaries, SolrParams params, Document doc, String fieldName ){
547
String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
548
if (alternateField != null && alternateField.length() > 0) {
549
String[] altTexts = doc.getValues(alternateField);
550
if (altTexts != null && altTexts.length > 0){
551
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH,0);
552
if( alternateFieldLen <= 0 ){
553
docSummaries.add(fieldName, altTexts);
556
List<String> altList = new ArrayList<String>();
558
for( String altText: altTexts ){
559
altList.add( len + altText.length() > alternateFieldLen ?
560
new String(altText.substring( 0, alternateFieldLen - len )) : altText );
561
len += altText.length();
562
if( len >= alternateFieldLen ) break;
564
docSummaries.add(fieldName, altList);
570
private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
573
TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docText));
575
tstream = new TokenOrderingFilter(ts, 10);
580
/** Orders Tokens in a window first by their startOffset ascending.
581
* endOffset is currently ignored.
582
* This is meant to work around fickleness in the highlighter only. It
583
* can mess up token positions and should not be used for indexing or querying.
585
final class TokenOrderingFilter extends TokenFilter {
586
private final int windowSize;
587
private final LinkedList<OrderedToken> queue = new LinkedList<OrderedToken>();
588
private boolean done=false;
589
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
591
protected TokenOrderingFilter(TokenStream input, int windowSize) {
593
this.windowSize = windowSize;
597
public boolean incrementToken() throws IOException {
598
while (!done && queue.size() < windowSize) {
599
if (!input.incrementToken()) {
604
// reverse iterating for better efficiency since we know the
605
// list is already sorted, and most token start offsets will be too.
606
ListIterator<OrderedToken> iter = queue.listIterator(queue.size());
607
while(iter.hasPrevious()) {
608
if (offsetAtt.startOffset() >= iter.previous().startOffset) {
609
// insertion will be before what next() would return (what
610
// we just compared against), so move back one so the insertion
616
OrderedToken ot = new OrderedToken();
617
ot.state = captureState();
618
ot.startOffset = offsetAtt.startOffset();
622
if (queue.isEmpty()) {
625
restoreState(queue.removeFirst().state);
631
// for TokenOrderingFilter, so it can easily sort by startOffset
637
class TermOffsetsTokenStream {
639
TokenStream bufferedTokenStream = null;
640
OffsetAttribute bufferedOffsetAtt;
642
int bufferedStartOffset;
643
int bufferedEndOffset;
647
public TermOffsetsTokenStream( TokenStream tstream ){
648
bufferedTokenStream = tstream;
649
bufferedOffsetAtt = bufferedTokenStream.addAttribute(OffsetAttribute.class);
651
bufferedToken = null;
654
public TokenStream getMultiValuedTokenStream( final int length ){
655
endOffset = startOffset + length;
656
return new MultiValuedStream(length);
659
final class MultiValuedStream extends TokenStream {
660
private final int length;
661
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
663
MultiValuedStream(int length) {
664
super(bufferedTokenStream.cloneAttributes());
665
this.length = length;
669
public boolean incrementToken() throws IOException {
671
if( bufferedToken == null ) {
672
if (!bufferedTokenStream.incrementToken())
674
bufferedToken = bufferedTokenStream.captureState();
675
bufferedStartOffset = bufferedOffsetAtt.startOffset();
676
bufferedEndOffset = bufferedOffsetAtt.endOffset();
679
if( startOffset <= bufferedStartOffset &&
680
bufferedEndOffset <= endOffset ){
681
restoreState(bufferedToken);
682
bufferedToken = null;
683
offsetAtt.setOffset( offsetAtt.startOffset() - startOffset, offsetAtt.endOffset() - startOffset );
686
else if( bufferedEndOffset > endOffset ){
687
startOffset += length + 1;
690
bufferedToken = null;