2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.solr.highlight;
20
import org.apache.lucene.analysis.Analyzer;
21
import org.apache.lucene.analysis.TokenStream;
22
import org.apache.lucene.analysis.WhitespaceAnalyzer;
23
import org.apache.solr.SolrTestCaseJ4;
24
import org.apache.solr.request.SolrQueryRequest;
25
import org.apache.solr.util.*;
26
import org.apache.solr.common.params.HighlightParams;
27
import org.junit.After;
28
import org.junit.BeforeClass;
29
import org.junit.Test;
31
import java.io.StringReader;
32
import java.util.Arrays;
33
import java.util.HashMap;
34
import java.util.List;
37
* Tests some basic functionality of Solr while demonstrating good
38
* Best Practices for using AbstractSolrTestCase
40
public class HighlighterTest extends SolrTestCaseJ4 {
42
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +
43
"is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is " +
44
"is is is is is is is is is is is is is " +
45
"is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated " +
46
"at all--we want two disjoint long fragments.";
49
public static void beforeClass() throws Exception {
50
initCore("solrconfig.xml","schema.xml");
55
public void tearDown() throws Exception {
56
// if you override setUp or tearDown, you better call
57
// the super classes version
63
public void testConfig()
65
SolrHighlighter highlighter = h.getCore().getHighlighter();
67
// Make sure we loaded the one formatter
68
SolrFormatter fmt1 = highlighter.formatters.get( null );
69
SolrFormatter fmt2 = highlighter.formatters.get( "" );
70
assertSame( fmt1, fmt2 );
71
assertTrue( fmt1 instanceof HtmlFormatter );
74
// Make sure we loaded the one formatter
75
SolrFragmenter gap = highlighter.fragmenters.get( "gap" );
76
SolrFragmenter regex = highlighter.fragmenters.get( "regex" );
77
SolrFragmenter frag = highlighter.fragmenters.get( null );
78
assertSame( gap, frag );
79
assertTrue( gap instanceof GapFragmenter );
80
assertTrue( regex instanceof RegexFragmenter );
84
public void testMergeContiguous() throws Exception {
85
HashMap<String,String> args = new HashMap<String,String>();
86
args.put(HighlightParams.HIGHLIGHT, "true");
87
args.put("df", "t_text");
88
args.put(HighlightParams.FIELDS, "");
89
args.put(HighlightParams.SNIPPETS, String.valueOf(4));
90
args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
91
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
92
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
93
"standard", 0, 200, args);
94
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
95
"Let us see what happens to long in this case.";
96
String gold = "this is some <em>long</em> text. It has the word <em>long</em> in many places. In fact, it has <em>long</em> on some different fragments. " +
97
"Let us see what happens to <em>long</em> in this case.";
98
assertU(adoc("t_text", input, "id", "1"));
101
assertQ("Merge Contiguous",
102
sumLRF.makeRequest("t_text:long"),
103
"//lst[@name='highlighting']/lst[@name='1']",
104
"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"
106
args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
107
assertU(adoc("t_text", input, "id", "1"));
110
assertQ("Merge Contiguous",
111
sumLRF.makeRequest("t_text:long"),
112
"//lst[@name='highlighting']/lst[@name='1']",
113
"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"
116
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
117
args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
118
sumLRF = h.getRequestFactory(
119
"standard", 0, 200, args);
120
assertQ("Merge Contiguous",
121
sumLRF.makeRequest("t_text:long"),
122
"//lst[@name='highlighting']/lst[@name='1']",
123
"//lst[@name='1']/arr[@name='t_text']/str[.='this is some <em>long</em> text. It has']",
124
"//lst[@name='1']/arr[@name='t_text']/str[.=' the word <em>long</em> in many places. In fact, it has']",
125
"//lst[@name='1']/arr[@name='t_text']/str[.=' <em>long</em> on some different fragments. Let us']",
126
"//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to <em>long</em> in this case.']"
131
public void testTermVecHighlight() {
133
// do summarization using term vectors
134
HashMap<String,String> args = new HashMap<String,String>();
135
args.put("hl", "true");
136
args.put("hl.fl", "tv_text");
137
args.put("hl.snippets", "2");
138
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
139
"standard",0,200,args);
141
assertU(adoc("tv_text", LONG_TEXT,
145
assertQ("Basic summarization",
146
sumLRF.makeRequest("tv_text:long"),
147
"//lst[@name='highlighting']/lst[@name='1']",
148
"//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
149
"//arr[@name='tv_text']/str[.=' <em>long</em> fragments.']"
154
public void testTermOffsetsTokenStream() throws Exception {
155
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
156
Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
157
TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
158
a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );
159
for( String v : multivalued ){
160
TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
161
Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
162
TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
163
while (ts1.incrementToken()) {
164
assertTrue(ts2.incrementToken());
165
assertEquals(ts1, ts2);
167
assertFalse(ts2.incrementToken());
172
public void testTermVecMultiValuedHighlight() throws Exception {
174
// do summarization using term vectors on multivalued field
175
HashMap<String,String> args = new HashMap<String,String>();
176
args.put("hl", "true");
177
args.put("hl.fl", "tv_mv_text");
178
args.put("hl.snippets", "2");
179
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
180
"standard",0,200,args);
182
assertU(adoc("tv_mv_text", LONG_TEXT,
183
"tv_mv_text", LONG_TEXT,
187
assertQ("Basic summarization",
188
sumLRF.makeRequest("tv_mv_text:long"),
189
"//lst[@name='highlighting']/lst[@name='1']",
190
"//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
191
"//arr[@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
195
// Variant of testTermVecMultiValuedHighlight to make sure that
196
// more than just the first value of a multi-valued field is
197
// considered for highlighting.
199
public void testTermVecMultiValuedHighlight2() throws Exception {
201
// do summarization using term vectors on multivalued field
202
HashMap<String,String> args = new HashMap<String,String>();
203
args.put("hl", "true");
204
args.put("hl.fl", "tv_mv_text");
205
args.put("hl.snippets", "2");
206
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
207
"standard",0,200,args);
209
String shortText = "short";
210
assertU(adoc("tv_mv_text", shortText,
211
"tv_mv_text", LONG_TEXT,
215
assertQ("Basic summarization",
216
sumLRF.makeRequest("tv_mv_text:long"),
217
"//lst[@name='highlighting']/lst[@name='1']",
218
"//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
219
"//arr[@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
224
public void testDisMaxHighlight() {
226
// same test run through dismax handler
227
HashMap<String,String> args = new HashMap<String,String>();
228
args.put("hl", "true");
229
args.put("hl.fl", "tv_text");
230
args.put("qf", "tv_text");
231
args.put("q.alt", "*:*");
232
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
233
"dismax",0,200,args);
235
assertU(adoc("tv_text", "a long day's night", "id", "1"));
238
assertQ("Basic summarization",
239
sumLRF.makeRequest("long"),
240
"//lst[@name='highlighting']/lst[@name='1']",
241
"//lst[@name='1']/arr[@name='tv_text']/str"
244
// try the same thing without a q param
245
assertQ("Should not explode...", // q.alt should return everything
246
sumLRF.makeRequest( new String[] { null } ), // empty query
247
"//result[@numFound='1']"
252
public void testMultiValueAnalysisHighlight() {
254
// do summarization using re-analysis of the field
255
HashMap<String,String> args = new HashMap<String,String>();
256
args.put("hl", "true");
257
args.put("hl.fl", "textgap");
258
args.put("df", "textgap");
259
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
260
"standard", 0, 200, args);
262
assertU(adoc("textgap", "first entry hasnt queryword",
263
"textgap", "second entry has queryword long",
267
assertQ("Basic summarization",
268
sumLRF.makeRequest("long"),
269
"//lst[@name='highlighting']/lst[@name='1']",
270
"//lst[@name='1']/arr[@name='textgap']/str"
276
public void testMultiValueBestFragmentHighlight() {
277
HashMap<String,String> args = new HashMap<String,String>();
278
args.put("hl", "true");
279
args.put("hl.fl", "textgap");
280
args.put("df", "textgap");
281
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
282
"standard", 0, 200, args);
284
assertU(adoc("textgap", "first entry has one word foo",
285
"textgap", "second entry has both words foo bar",
289
assertQ("Best fragment summarization",
290
sumLRF.makeRequest("foo bar"),
291
"//lst[@name='highlighting']/lst[@name='1']",
292
"//lst[@name='1']/arr[@name='textgap']/str[.=\'second entry has both words <em>foo</em> <em>bar</em>\']"
297
public void testDefaultFieldHighlight() {
299
// do summarization using re-analysis of the field
300
HashMap<String,String> args = new HashMap<String,String>();
301
args.put("hl", "true");
302
args.put("df", "t_text");
303
args.put("hl.fl", "");
304
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
305
"standard", 0, 200, args);
307
assertU(adoc("t_text", "a long day's night", "id", "1"));
310
assertQ("Basic summarization",
311
sumLRF.makeRequest("long"),
312
"//lst[@name='highlighting']/lst[@name='1']",
313
"//lst[@name='1']/arr[@name='t_text']/str"
320
public void testHighlightDisabled() {
322
// ensure highlighting can be explicitly disabled
323
HashMap<String,String> args = new HashMap<String,String>();
324
args.put("hl", "false");
325
args.put("hl.fl", "t_text");
326
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
327
"standard", 0, 200, args);
329
assertU(adoc("t_text", "a long day's night", "id", "1"));
332
assertQ("Basic summarization",
333
sumLRF.makeRequest("t_text:long"), "not(//lst[@name='highlighting'])");
338
public void testTwoFieldHighlight() {
340
// do summarization using re-analysis of the field
341
HashMap<String,String> args = new HashMap<String,String>();
342
args.put("hl", "true");
343
args.put("hl.fl", "t_text tv_text");
344
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
345
"standard", 0, 200, args);
347
assertU(adoc("t_text", "a long day's night", "id", "1",
348
"tv_text", "a long night's day"));
351
assertQ("Basic summarization",
352
sumLRF.makeRequest("t_text:long"),
353
"//lst[@name='highlighting']/lst[@name='1']",
354
"//lst[@name='1']/arr[@name='t_text']/str",
355
"//lst[@name='1']/arr[@name='tv_text']/str"
360
public void testFieldMatch()
362
assertU(adoc("t_text1", "random words for highlighting tests", "id", "1",
363
"t_text2", "more random words for second field"));
367
HashMap<String,String> args = new HashMap<String,String>();
368
args.put("hl", "true");
369
args.put("hl.fl", "t_text1 t_text2");
371
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
372
"standard", 0, 200, args);
373
// default should highlight both random and words in both fields
374
assertQ("Test Default",
375
sumLRF.makeRequest("t_text1:random OR t_text2:words"),
376
"//lst[@name='highlighting']/lst[@name='1']",
377
"//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> <em>words</em> for highlighting tests']",
378
"//lst[@name='1']/arr[@name='t_text2']/str[.='more <em>random</em> <em>words</em> for second field']"
381
// requireFieldMatch=true - highlighting should only occur if term matched in that field
382
args.put("hl.requireFieldMatch", "true");
383
sumLRF = h.getRequestFactory(
384
"standard", 0, 200, args);
385
assertQ("Test RequireFieldMatch",
386
sumLRF.makeRequest("t_text1:random OR t_text2:words"),
387
"//lst[@name='highlighting']/lst[@name='1']",
388
"//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> words for highlighting tests']",
389
"//lst[@name='1']/arr[@name='t_text2']/str[.='more random <em>words</em> for second field']"
392
// test case for un-optimized index
393
assertU(adoc("t_text1", "random words for highlighting tests", "id", "2",
394
"t_text2", "more random words for second field"));
397
sumLRF = h.getRequestFactory(
398
"standard", 0, 200, args);
399
assertQ("Test RequireFieldMatch on un-optimized index",
400
sumLRF.makeRequest("t_text1:random OR t_text2:words"),
401
"//lst[@name='highlighting']/lst[@name='2']",
402
"//lst[@name='2']/arr[@name='t_text1']/str[.='<em>random</em> words for highlighting tests']",
403
"//lst[@name='2']/arr[@name='t_text2']/str[.='more random <em>words</em> for second field']"
408
public void testCustomSimpleFormatterHighlight() {
410
// do summarization using a custom formatter
411
HashMap<String,String> args = new HashMap<String,String>();
412
args.put("hl", "true");
413
args.put("hl.fl", "t_text");
414
args.put("hl.simple.pre","<B>");
415
args.put("hl.simple.post","</B>");
416
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
417
"standard", 0, 200, args);
419
assertU(adoc("t_text", "a long days night", "id", "1"));
422
assertQ("Basic summarization",
423
sumLRF.makeRequest("t_text:long"),
424
"//lst[@name='highlighting']/lst[@name='1']",
425
"//lst[@name='1']/arr[@name='t_text']/str[.='a <B>long</B> days night']"
428
// test a per-field override
429
args.put("f.t_text.hl.simple.pre","<I>");
430
args.put("f.t_text.hl.simple.post","</I>");
431
sumLRF = h.getRequestFactory(
432
"standard", 0, 200, args);
433
assertQ("Basic summarization",
434
sumLRF.makeRequest("t_text:long"),
435
"//lst[@name='highlighting']/lst[@name='1']",
436
"//lst[@name='1']/arr[@name='t_text']/str[.='a <I>long</I> days night']"
442
public void testLongFragment() {
444
HashMap<String,String> args = new HashMap<String,String>();
445
args.put("hl", "true");
446
args.put("hl.fl", "tv_text");
447
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
448
"standard", 0, 200, args);
452
"junit: [mkdir] Created dir: /home/klaas/worio/backend/trunk/build-src/solr-nightly/build/test-results [junit] Running org.apache.solr.BasicFunctionalityTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 5.36 sec [junit] Running org.apache.solr.ConvertedLegacyTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 8.268 sec [junit] Running org.apache.solr.DisMaxRequestHandlerTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.56 sec [junit] Running org.apache.solr.HighlighterTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 4.979 sec [junit] Running org.apache.solr.OutputWriterTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.797 sec [junit] Running org.apache.solr.SampleTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 1.021 sec [junit] Running org.apache.solr.analysis.TestBufferedTokenStream [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.05 sec [junit] Running org.apache.solr.analysis.TestRemoveDuplicatesTokenFilter [junit] Tests run: 3, Failures: 0, Errors: 0, Time elapsed: 0.054 sec [junit] Running org.apache.solr.analysis.TestSynonymFilter [junit] Tests run: 6, Failures: 0, Errors: 0, Time elapsed: 0.081 sec [junit] Running org.apache.solr.analysis.TestWordDelimiterFilter [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.714 sec [junit] Running org.apache.solr.search.TestDocSet [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 0.788 sec [junit] Running org.apache.solr.util.SolrPluginUtilsTest [junit] Tests run: 5, Failures: 0, Errors: 0, Time elapsed: 3.519 sec [junit] Running org.apache.solr.util.TestOpenBitSet [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.533 sec";
453
assertU(adoc("tv_text", text, "id", "1"));
456
assertQ("Basic summarization",
457
sumLRF.makeRequest("tv_text:dir"),
458
"//lst[@name='highlighting']/lst[@name='1']",
459
"//lst[@name='1']/arr[@name='tv_text']/str"
464
public void testMaxChars() {
465
HashMap<String,String> args = new HashMap<String,String>();
466
args.put("fl", "id score");
467
args.put("hl", "true");
468
args.put("hl.snippets", "10");
469
args.put("hl.fl", "t_text");
470
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
471
"standard", 0, 200, args);
474
assertU(adoc("t_text", LONG_TEXT, "id", "1"));
477
assertQ("token at start of text",
478
sumLRF.makeRequest("t_text:disjoint"),
479
"//lst[@name='highlighting']/lst[@name='1']",
480
"//lst[@name='1']/arr[count(str)=1]"
482
args.put("hl.maxAnalyzedChars", "20");
483
sumLRF = h.getRequestFactory("standard", 0, 200, args);
484
assertQ("token at end of text",
485
sumLRF.makeRequest("t_text:disjoint"),
486
"//lst[@name='highlighting']/lst[@name='1']",
487
"//lst[@name='1'][not(*)]"
489
args.put("hl.maxAnalyzedChars", "-1");
490
sumLRF = h.getRequestFactory("standard", 0, 200, args);
491
assertQ("token at start of text",
492
sumLRF.makeRequest("t_text:disjoint"),
493
"//lst[@name='highlighting']/lst[@name='1']",
494
"//lst[@name='1']/arr[count(str)=1]"
499
public void testRegexFragmenter() {
500
HashMap<String,String> args = new HashMap<String,String>();
501
args.put("fl", "id score");
502
args.put("hl", "true");
503
args.put("hl.snippets", "10");
504
args.put("hl.fl", "t_text");
505
args.put("hl.fragmenter", "regex");
506
args.put("hl.regex.pattern", "[-\\w ,\"']{20,200}");
507
args.put("hl.regex.slop", ".9");
508
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
509
"standard", 0, 200, args);
511
String t = "This is an example of a sentence. Another example \"sentence\" with " +
512
"special characters\nand a line-break! Miscellaneous character like ^ are " +
513
"unknowns and end up being bad example s of sentences? I wonder how " +
514
"slashes/other punctuation fare in these examples?";
515
assertU(adoc("t_text", t, "id", "1"));
518
assertQ("regex fragmenter",
519
sumLRF.makeRequest("t_text:example"),
520
"//lst[@name='highlighting']/lst[@name='1']",
521
"//arr/str[.='This is an <em>example</em> of a sentence']",
522
"//arr/str[.='. Another <em>example</em> \"sentence\" with special characters\nand a line-break']",
523
"//arr/str[.=' ^ are unknowns and end up being bad <em>example</em> s of sentences']",
524
"//arr/str[.='/other punctuation fare in these <em>examples</em>?']"
526
// try with some punctuation included
527
args.put("hl.regex.pattern", "[-\\w ,^/\\n\"']{20,200}");
528
sumLRF = h.getRequestFactory("standard", 0, 200, args);
529
assertQ("regex fragmenter 2",
530
sumLRF.makeRequest("t_text:example"),
531
"//lst[@name='highlighting']/lst[@name='1']",
532
"//arr/str[.='This is an <em>example</em> of a sentence']",
533
"//arr/str[.='. Another <em>example</em> \"sentence\" with special characters\nand a line-break']",
534
"//arr/str[.='! Miscellaneous character like ^ are unknowns and end up being bad <em>example</em> s of sentences']",
535
"//arr/str[.='? I wonder how slashes/other punctuation fare in these <em>examples</em>?']"
540
public void testVariableFragsize() {
541
assertU(adoc("tv_text", "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all",
547
HashMap<String,String> args = new HashMap<String,String>();
548
args.put("hl", "true");
549
args.put("hl.fl", "tv_text");
550
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
551
"standard", 0, 200, args);
552
assertQ("Basic summarization",
553
sumLRF.makeRequest("tv_text:long"),
554
"//lst[@name='highlighting']/lst[@name='1']",
555
"//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night this should be a piece of text which']"
559
args.put("hl.fragsize","25");
560
sumLRF = h.getRequestFactory(
561
"standard", 0, 200, args);
562
assertQ("Basic summarization",
563
sumLRF.makeRequest("tv_text:long"),
564
"//lst[@name='highlighting']/lst[@name='1']",
565
"//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night']"
568
// 0 - NullFragmenter
569
args.put("hl.fragsize","0");
570
sumLRF = h.getRequestFactory(
571
"standard", 0, 200, args);
572
assertQ("Basic summarization",
573
sumLRF.makeRequest("tv_text:long"),
574
"//lst[@name='highlighting']/lst[@name='1']",
575
"//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all']"
580
public void testAlternateSummary() {
582
assertU(adoc("tv_text", "keyword is only here",
583
"t_text", "a piece of text to be substituted",
590
HashMap<String,String> args = new HashMap<String,String>();
591
args.put("hl", "true");
592
args.put("hl.fragsize","0");
593
args.put("hl.fl", "t_text");
594
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
595
"standard", 0, 200, args);
598
assertQ("Alternate summarization",
599
sumLRF.makeRequest("tv_text:keyword"),
600
"//lst[@name='highlighting']/lst[@name='1']",
601
"//lst[@name='highlighting']/lst[@name='1' and count(*)=0]"
605
args.put("hl.alternateField", "foo_t");
606
sumLRF = h.getRequestFactory("standard", 0, 200, args);
607
assertQ("Alternate summarization",
608
sumLRF.makeRequest("tv_text:keyword"),
609
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
610
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='hi']"
613
// with an alternate + max length
614
args.put("hl.alternateField", "t_text");
615
args.put("hl.maxAlternateFieldLength", "15");
616
sumLRF = h.getRequestFactory("standard", 0, 200, args);
617
assertQ("Alternate summarization",
618
sumLRF.makeRequest("tv_text:keyword"),
619
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
620
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='a piece of text']"
625
public void testPhraseHighlighter() {
626
HashMap<String,String> args = new HashMap<String,String>();
627
args.put("hl", "true");
628
args.put("hl.fl", "t_text");
629
args.put("hl.fragsize", "40");
630
args.put("hl.snippets", "10");
631
args.put("hl.usePhraseHighlighter", "false");
633
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
634
"standard", 0, 200, args);
636
// String borrowed from Lucene's HighlighterTest
637
String t = "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy";
639
assertU(adoc("t_text", t, "id", "1"));
643
String oldHighlight1 = "//lst[@name='1']/arr[@name='t_text']/str[.='This piece of <em>text</em> <em>refers</em> to Kennedy']";
644
String oldHighlight2 = "//lst[@name='1']/arr[@name='t_text']/str[.=' at the beginning then has a longer piece of <em>text</em>']";
645
String oldHighlight3 = "//lst[@name='1']/arr[@name='t_text']/str[.=' with another <em>reference</em> to Kennedy']";
646
String newHighlight1 = "//lst[@name='1']/arr[@name='t_text']/str[.='This piece of <em>text</em> <em>refers</em> to Kennedy']";
648
// check if old functionality is still the same
649
assertQ("Phrase highlighting - old",
650
sumLRF.makeRequest("t_text:\"text refers\""),
651
"//lst[@name='highlighting']/lst[@name='1']",
652
oldHighlight1, oldHighlight2, oldHighlight3
655
assertQ("Phrase highlighting - old",
656
sumLRF.makeRequest("t_text:text refers"),
657
"//lst[@name='highlighting']/lst[@name='1']",
658
oldHighlight1, oldHighlight2, oldHighlight3
661
// now check if Lucene-794 highlighting works as expected
662
args.put("hl.usePhraseHighlighter", "true");
664
sumLRF = h.getRequestFactory("standard", 0, 200, args);
666
// check phrase highlighting
667
assertQ("Phrase highlighting - Lucene-794",
668
sumLRF.makeRequest("t_text:\"text refers\""),
669
"//lst[@name='highlighting']/lst[@name='1']",
673
// non phrase queries should be highlighted as they were before this fix
674
assertQ("Phrase highlighting - Lucene-794",
675
sumLRF.makeRequest("t_text:text refers"),
676
"//lst[@name='highlighting']/lst[@name='1']",
677
oldHighlight1, oldHighlight2, oldHighlight3
682
public void testGetHighlightFields() {
683
HashMap<String, String> args = new HashMap<String, String>();
684
args.put("fl", "id score");
685
args.put("hl", "true");
686
args.put("hl.fl", "t*");
688
assertU(adoc("id", "0", "title", "test", // static stored
689
"text", "test", // static not stored
690
"foo_s", "test", // dynamic stored
691
"foo_sI", "test", // dynamic not stored
692
"weight", "1.0")); // stored but not text
696
TestHarness.LocalRequestFactory lrf = h.getRequestFactory("standard", 0,
699
SolrQueryRequest request = lrf.makeRequest("test");
700
SolrHighlighter highlighter = request.getCore().getHighlighter();
701
List<String> highlightFieldNames = Arrays.asList(highlighter
702
.getHighlightFields(null, request, new String[] {}));
703
assertTrue("Expected to highlight on field \"title\"", highlightFieldNames
705
assertFalse("Expected to not highlight on field \"text\"",
706
highlightFieldNames.contains("text"));
707
assertFalse("Expected to not highlight on field \"weight\"",
708
highlightFieldNames.contains("weight"));
711
args.put("hl.fl", "foo_*");
712
lrf = h.getRequestFactory("standard", 0, 10, args);
713
request = lrf.makeRequest("test");
714
highlighter = request.getCore().getHighlighter();
715
highlightFieldNames = Arrays.asList(highlighter.getHighlightFields(null,
716
request, new String[] {}));
717
assertEquals("Expected one field to highlight on", 1, highlightFieldNames
719
assertEquals("Expected to highlight on field \"foo_s\"", "foo_s",
720
highlightFieldNames.get(0));
725
public void testDefaultFieldPrefixWildcardHighlight() {
727
// do summarization using re-analysis of the field
728
HashMap<String,String> args = new HashMap<String,String>();
729
args.put("hl", "true");
730
args.put("df", "t_text");
731
args.put("hl.fl", "");
732
args.put("hl.usePhraseHighlighter", "true");
733
args.put("hl.highlightMultiTerm", "true");
734
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
735
"standard", 0, 200, args);
737
assertU(adoc("t_text", "a long day's night", "id", "1"));
740
assertQ("Basic summarization",
741
sumLRF.makeRequest("lon*"),
742
"//lst[@name='highlighting']/lst[@name='1']",
743
"//lst[@name='1']/arr[@name='t_text']/str"
749
public void testDefaultFieldNonPrefixWildcardHighlight() {
751
// do summarization using re-analysis of the field
752
HashMap<String,String> args = new HashMap<String,String>();
753
args.put("hl", "true");
754
args.put("df", "t_text");
755
args.put("hl.fl", "");
756
args.put("hl.usePhraseHighlighter", "true");
757
args.put("hl.highlightMultiTerm", "true");
758
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
759
"standard", 0, 200, args);
761
assertU(adoc("t_text", "a long day's night", "id", "1"));
764
assertQ("Basic summarization",
765
sumLRF.makeRequest("l*g"),
766
"//lst[@name='highlighting']/lst[@name='1']",
767
"//lst[@name='1']/arr[@name='t_text']/str"
772
public void testSubwordWildcardHighlight() {
773
assertU(adoc("subword", "lorem PowerShot.com ipsum", "id", "1"));
775
assertQ("subword wildcard highlighting",
776
req("q", "subword:pow*", "hl", "true", "hl.fl", "subword"),
777
"//lst[@name='highlighting']/lst[@name='1']" +
778
"/arr[@name='subword']/str='lorem <em>PowerShot.com</em> ipsum'");
781
public void testSubwordWildcardHighlightWithTermOffsets() {
782
assertU(adoc("subword_offsets", "lorem PowerShot.com ipsum", "id", "1"));
784
assertQ("subword wildcard highlighting",
785
req("q", "subword_offsets:pow*", "hl", "true", "hl.fl", "subword_offsets"),
786
"//lst[@name='highlighting']/lst[@name='1']" +
787
"/arr[@name='subword_offsets']/str='lorem <em>PowerShot.com</em> ipsum'");
790
public void testSubwordWildcardHighlightWithTermOffsets2() {
791
assertU(adoc("subword_offsets", "lorem PowerShot ipsum", "id", "1"));
793
assertQ("subword wildcard highlighting",
794
req("q", "subword_offsets:pow*", "hl", "true", "hl.fl", "subword_offsets"),
795
"//lst[@name='highlighting']/lst[@name='1']" +
796
"/arr[@name='subword_offsets']/str='lorem <em>PowerShot</em> ipsum'");
799
public void testHlQParameter() {
800
assertU(adoc("title", "Apache Software Foundation", "id", "1"));
802
assertQ("hl.q parameter overrides q parameter",
803
req("q", "title:Apache", "hl", "true", "hl.fl", "title", "hl.q", "title:Software"),
804
"//lst[@name='highlighting']/lst[@name='1']" +
805
"/arr[@name='title']/str='Apache <em>Software</em> Foundation'");
806
assertQ("hl.q parameter overrides q parameter",
807
req("q", "title:Apache", "hl", "true", "hl.fl", "title", "hl.q", "{!v=$qq}", "qq", "title:Foundation"),
808
"//lst[@name='highlighting']/lst[@name='1']" +
809
"/arr[@name='title']/str='Apache Software <em>Foundation</em>'");