1
package org.apache.lucene.search.vectorhighlight;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.io.Reader;
22
import java.util.HashSet;
25
import org.apache.lucene.analysis.Analyzer;
26
import org.apache.lucene.analysis.Token;
27
import org.apache.lucene.analysis.TokenStream;
28
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
29
import org.apache.lucene.search.BooleanQuery;
30
import org.apache.lucene.search.BooleanClause.Occur;
31
import org.apache.lucene.util.AttributeImpl;
33
public class IndexTimeSynonymTest extends AbstractTestCase {
35
public void testFieldTermStackIndex1wSearch1term() throws Exception {
38
FieldQuery fq = new FieldQuery( tq( "Mac" ), true, true );
39
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
40
assertEquals( 1, stack.termList.size() );
41
assertEquals( "Mac(11,20,3)", stack.pop().toString() );
44
public void testFieldTermStackIndex1wSearch2terms() throws Exception {
47
BooleanQuery bq = new BooleanQuery();
48
bq.add( tq( "Mac" ), Occur.SHOULD );
49
bq.add( tq( "MacBook" ), Occur.SHOULD );
50
FieldQuery fq = new FieldQuery( bq, true, true );
51
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
52
assertEquals( 2, stack.termList.size() );
53
Set<String> expectedSet = new HashSet<String>();
54
expectedSet.add( "Mac(11,20,3)" );
55
expectedSet.add( "MacBook(11,20,3)" );
56
assertTrue( expectedSet.contains( stack.pop().toString() ) );
57
assertTrue( expectedSet.contains( stack.pop().toString() ) );
60
public void testFieldTermStackIndex1w2wSearch1term() throws Exception {
63
FieldQuery fq = new FieldQuery( tq( "pc" ), true, true );
64
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
65
assertEquals( 1, stack.termList.size() );
66
assertEquals( "pc(3,5,1)", stack.pop().toString() );
69
public void testFieldTermStackIndex1w2wSearch1phrase() throws Exception {
72
FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
73
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
74
assertEquals( 2, stack.termList.size() );
75
assertEquals( "personal(3,5,1)", stack.pop().toString() );
76
assertEquals( "computer(3,5,2)", stack.pop().toString() );
79
public void testFieldTermStackIndex1w2wSearch1partial() throws Exception {
82
FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
83
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
84
assertEquals( 1, stack.termList.size() );
85
assertEquals( "computer(3,5,2)", stack.pop().toString() );
88
public void testFieldTermStackIndex1w2wSearch1term1phrase() throws Exception {
91
BooleanQuery bq = new BooleanQuery();
92
bq.add( tq( "pc" ), Occur.SHOULD );
93
bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
94
FieldQuery fq = new FieldQuery( bq, true, true );
95
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
96
assertEquals( 3, stack.termList.size() );
97
Set<String> expectedSet = new HashSet<String>();
98
expectedSet.add( "pc(3,5,1)" );
99
expectedSet.add( "personal(3,5,1)" );
100
assertTrue( expectedSet.contains( stack.pop().toString() ) );
101
assertTrue( expectedSet.contains( stack.pop().toString() ) );
102
assertEquals( "computer(3,5,2)", stack.pop().toString() );
105
public void testFieldTermStackIndex2w1wSearch1term() throws Exception {
108
FieldQuery fq = new FieldQuery( tq( "pc" ), true, true );
109
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
110
assertEquals( 1, stack.termList.size() );
111
assertEquals( "pc(3,20,1)", stack.pop().toString() );
114
public void testFieldTermStackIndex2w1wSearch1phrase() throws Exception {
117
FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
118
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
119
assertEquals( 2, stack.termList.size() );
120
assertEquals( "personal(3,20,1)", stack.pop().toString() );
121
assertEquals( "computer(3,20,2)", stack.pop().toString() );
124
public void testFieldTermStackIndex2w1wSearch1partial() throws Exception {
127
FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
128
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
129
assertEquals( 1, stack.termList.size() );
130
assertEquals( "computer(3,20,2)", stack.pop().toString() );
133
public void testFieldTermStackIndex2w1wSearch1term1phrase() throws Exception {
136
BooleanQuery bq = new BooleanQuery();
137
bq.add( tq( "pc" ), Occur.SHOULD );
138
bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
139
FieldQuery fq = new FieldQuery( bq, true, true );
140
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
141
assertEquals( 3, stack.termList.size() );
142
Set<String> expectedSet = new HashSet<String>();
143
expectedSet.add( "pc(3,20,1)" );
144
expectedSet.add( "personal(3,20,1)" );
145
assertTrue( expectedSet.contains( stack.pop().toString() ) );
146
assertTrue( expectedSet.contains( stack.pop().toString() ) );
147
assertEquals( "computer(3,20,2)", stack.pop().toString() );
150
public void testFieldPhraseListIndex1w2wSearch1phrase() throws Exception {
153
FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
154
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
155
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
156
assertEquals( 1, fpl.phraseList.size() );
157
assertEquals( "personalcomputer(1.0)((3,5))", fpl.phraseList.get( 0 ).toString() );
158
assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
159
assertEquals( 5, fpl.phraseList.get( 0 ).getEndOffset() );
162
public void testFieldPhraseListIndex1w2wSearch1partial() throws Exception {
165
FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
166
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
167
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
168
assertEquals( 1, fpl.phraseList.size() );
169
assertEquals( "computer(1.0)((3,5))", fpl.phraseList.get( 0 ).toString() );
170
assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
171
assertEquals( 5, fpl.phraseList.get( 0 ).getEndOffset() );
174
public void testFieldPhraseListIndex1w2wSearch1term1phrase() throws Exception {
177
BooleanQuery bq = new BooleanQuery();
178
bq.add( tq( "pc" ), Occur.SHOULD );
179
bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
180
FieldQuery fq = new FieldQuery( bq, true, true );
181
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
182
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
183
assertEquals( 1, fpl.phraseList.size() );
184
assertTrue( fpl.phraseList.get( 0 ).toString().indexOf( "(1.0)((3,5))" ) > 0 );
185
assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
186
assertEquals( 5, fpl.phraseList.get( 0 ).getEndOffset() );
189
public void testFieldPhraseListIndex2w1wSearch1term() throws Exception {
192
FieldQuery fq = new FieldQuery( tq( "pc" ), true, true );
193
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
194
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
195
assertEquals( 1, fpl.phraseList.size() );
196
assertEquals( "pc(1.0)((3,20))", fpl.phraseList.get( 0 ).toString() );
197
assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
198
assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
201
public void testFieldPhraseListIndex2w1wSearch1phrase() throws Exception {
204
FieldQuery fq = new FieldQuery( pqF( "personal", "computer" ), true, true );
205
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
206
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
207
assertEquals( 1, fpl.phraseList.size() );
208
assertEquals( "personalcomputer(1.0)((3,20))", fpl.phraseList.get( 0 ).toString() );
209
assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
210
assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
213
public void testFieldPhraseListIndex2w1wSearch1partial() throws Exception {
216
FieldQuery fq = new FieldQuery( tq( "computer" ), true, true );
217
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
218
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
219
assertEquals( 1, fpl.phraseList.size() );
220
assertEquals( "computer(1.0)((3,20))", fpl.phraseList.get( 0 ).toString() );
221
assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
222
assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
225
public void testFieldPhraseListIndex2w1wSearch1term1phrase() throws Exception {
228
BooleanQuery bq = new BooleanQuery();
229
bq.add( tq( "pc" ), Occur.SHOULD );
230
bq.add( pqF( "personal", "computer" ), Occur.SHOULD );
231
FieldQuery fq = new FieldQuery( bq, true, true );
232
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
233
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
234
assertEquals( 1, fpl.phraseList.size() );
235
assertTrue( fpl.phraseList.get( 0 ).toString().indexOf( "(1.0)((3,20))" ) > 0 );
236
assertEquals( 3, fpl.phraseList.get( 0 ).getStartOffset() );
237
assertEquals( 20, fpl.phraseList.get( 0 ).getEndOffset() );
240
private void makeIndex1w() throws Exception {
242
// 012345678901234567890
243
// I'll buy a Macintosh
247
makeSynonymIndex( "I'll buy a Macintosh",
251
t("Macintosh",11,20),t("Mac",11,20,0),t("MacBook",11,20,0));
254
private void makeIndex1w2w() throws Exception {
260
makeSynonymIndex( "My pc was broken",
262
t("pc",3,5),t("personal",3,5,0),t("computer",3,5),
267
private void makeIndex2w1w() throws Exception {
268
// 1111111111222222222233
269
// 01234567890123456789012345678901
270
// My personal computer was broken
273
makeSynonymIndex( "My personal computer was broken",
275
t("personal",3,20),t("pc",3,20,0),t("computer",3,20),
280
void makeSynonymIndex( String value, Token... tokens ) throws Exception {
281
Analyzer analyzer = new TokenArrayAnalyzer( tokens );
282
make1dmfIndex( analyzer, value );
285
public static Token t( String text, int startOffset, int endOffset ){
286
return t( text, startOffset, endOffset, 1 );
289
public static Token t( String text, int startOffset, int endOffset, int positionIncrement ){
290
Token token = new Token( text, startOffset, endOffset );
291
token.setPositionIncrement( positionIncrement );
295
public static final class TokenArrayAnalyzer extends Analyzer {
297
public TokenArrayAnalyzer( Token... tokens ){
298
this.tokens = tokens;
302
public TokenStream tokenStream(String fieldName, Reader reader) {
303
TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
304
final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
308
public boolean incrementToken() throws IOException {
309
if( p >= tokens.length ) return false;
311
tokens[p++].copyTo(reusableToken);