~slub.team/goobi-indexserver/3.x

private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +

"is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is " +

"is is is is is is is is is is is is is " +

"is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated " +

"at all--we want two disjoint long fragments.";

@BeforeClass

public static void beforeClass() throws Exception {

initCore("solrconfig.xml","schema.xml");

}

@After

@Override

public void tearDown() throws Exception {

// if you override setUp or tearDown, you better call

// the super classes version

clearIndex();

super.tearDown();

}

@Test

public void testConfig()

{

SolrHighlighter highlighter = h.getCore().getHighlighter();

// Make sure we loaded the one formatter

SolrFormatter fmt1 = highlighter.formatters.get( null );

SolrFormatter fmt2 = highlighter.formatters.get( "" );

assertSame( fmt1, fmt2 );

assertTrue( fmt1 instanceof HtmlFormatter );

// Make sure we loaded the one formatter

SolrFragmenter gap = highlighter.fragmenters.get( "gap" );

SolrFragmenter regex = highlighter.fragmenters.get( "regex" );

SolrFragmenter frag = highlighter.fragmenters.get( null );

assertSame( gap, frag );

assertTrue( gap instanceof GapFragmenter );

assertTrue( regex instanceof RegexFragmenter );

}

@Test

public void testMergeContiguous() throws Exception {

HashMap<String,String> args = new HashMap<String,String>();

args.put(HighlightParams.HIGHLIGHT, "true");

args.put("df", "t_text");

args.put(HighlightParams.FIELDS, "");

args.put(HighlightParams.SNIPPETS, String.valueOf(4));

args.put(HighlightParams.FRAGSIZE, String.valueOf(40));

args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

"standard", 0, 200, args);

String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +

"Let us see what happens to long in this case.";

String gold = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +

"Let us see what happens to long in this case.";

assertU(adoc("t_text", input, "id", "1"));

assertU(commit());

100

assertU(optimize());

101

assertQ("Merge Contiguous",

102

sumLRF.makeRequest("t_text:long"),

103

"//lst[@name='highlighting']/lst[@name='1']",

104

"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"

105

);

106

args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");

107

assertU(adoc("t_text", input, "id", "1"));

108

assertU(commit());

109

assertU(optimize());

110

assertQ("Merge Contiguous",

111

sumLRF.makeRequest("t_text:long"),

112

"//lst[@name='highlighting']/lst[@name='1']",

113

"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"

114

);

115

116

args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");

117

args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");

118

sumLRF = h.getRequestFactory(

119

"standard", 0, 200, args);

120

assertQ("Merge Contiguous",

121

sumLRF.makeRequest("t_text:long"),

122

"//lst[@name='highlighting']/lst[@name='1']",

123

"//lst[@name='1']/arr[@name='t_text']/str[.='this is some long text. It has']",

124

"//lst[@name='1']/arr[@name='t_text']/str[.=' the word long in many places. In fact, it has']",

125

"//lst[@name='1']/arr[@name='t_text']/str[.=' long on some different fragments. Let us']",

126

"//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to long in this case.']"

127

);

128

}

129

130

@Test

131

public void testTermVecHighlight() {

132

133

// do summarization using term vectors

134

HashMap<String,String> args = new HashMap<String,String>();

135

args.put("hl", "true");

136

args.put("hl.fl", "tv_text");

137

args.put("hl.snippets", "2");

138

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

139

"standard",0,200,args);

140

141

assertU(adoc("tv_text", LONG_TEXT,

142

"id", "1"));

143

assertU(commit());

144

assertU(optimize());

145

assertQ("Basic summarization",

146

sumLRF.makeRequest("tv_text:long"),

147

"//lst[@name='highlighting']/lst[@name='1']",

148

"//lst[@name='1']/arr[@name='tv_text']/str[.='a long days night this should be a piece of text which']",

149

"//arr[@name='tv_text']/str[.=' long fragments.']"

150

);

151

}

152

153

@Test

154

public void testTermOffsetsTokenStream() throws Exception {

155

String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };

156

Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);

157

TermOffsetsTokenStream tots = new TermOffsetsTokenStream(

158

a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );

159

for( String v : multivalued ){

160

TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );

161

Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);

162

TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );

163

while (ts1.incrementToken()) {

164

assertTrue(ts2.incrementToken());

165

assertEquals(ts1, ts2);

166

}

167

assertFalse(ts2.incrementToken());

168

}

169

}

170

171

@Test

172

public void testTermVecMultiValuedHighlight() throws Exception {

173

174

// do summarization using term vectors on multivalued field

175

HashMap<String,String> args = new HashMap<String,String>();

176

args.put("hl", "true");

177

args.put("hl.fl", "tv_mv_text");

178

args.put("hl.snippets", "2");

179

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

180

"standard",0,200,args);

181

182

assertU(adoc("tv_mv_text", LONG_TEXT,

183

"tv_mv_text", LONG_TEXT,

184

"id", "1"));

185

assertU(commit());

186

assertU(optimize());

187

assertQ("Basic summarization",

188

sumLRF.makeRequest("tv_mv_text:long"),

189

"//lst[@name='highlighting']/lst[@name='1']",

190

"//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a long days night this should be a piece of text which']",

191

"//arr[@name='tv_mv_text']/str[.=' long fragments.']"

192

);

193

}

194

195

// Variant of testTermVecMultiValuedHighlight to make sure that

196

// more than just the first value of a multi-valued field is

197

// considered for highlighting.

198

@Test

199

public void testTermVecMultiValuedHighlight2() throws Exception {

200

201

// do summarization using term vectors on multivalued field

202

HashMap<String,String> args = new HashMap<String,String>();

203

args.put("hl", "true");

204

args.put("hl.fl", "tv_mv_text");

205

args.put("hl.snippets", "2");

206

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

207

"standard",0,200,args);

208

209

String shortText = "short";

210

assertU(adoc("tv_mv_text", shortText,

211

"tv_mv_text", LONG_TEXT,

212

"id", "1"));

213

assertU(commit());

214

assertU(optimize());

215

assertQ("Basic summarization",

216

sumLRF.makeRequest("tv_mv_text:long"),

217

"//lst[@name='highlighting']/lst[@name='1']",

218

"//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a long days night this should be a piece of text which']",

219

"//arr[@name='tv_mv_text']/str[.=' long fragments.']"

220

);

221

}

222

223

@Test

224

public void testDisMaxHighlight() {

225

226

// same test run through dismax handler

227

HashMap<String,String> args = new HashMap<String,String>();

228

args.put("hl", "true");

229

args.put("hl.fl", "tv_text");

230

args.put("qf", "tv_text");

231

args.put("q.alt", "*:*");

232

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

233

"dismax",0,200,args);

234

235

assertU(adoc("tv_text", "a long day's night", "id", "1"));

236

assertU(commit());

237

assertU(optimize());

238

assertQ("Basic summarization",

239

sumLRF.makeRequest("long"),

240

"//lst[@name='highlighting']/lst[@name='1']",

241

"//lst[@name='1']/arr[@name='tv_text']/str"

242

);

243

244

// try the same thing without a q param

245

assertQ("Should not explode...", // q.alt should return everything

246

sumLRF.makeRequest( new String[] { null } ), // empty query

247

"//result[@numFound='1']"

248

);

249

}

250

251

@Test

252

public void testMultiValueAnalysisHighlight() {

253

254

// do summarization using re-analysis of the field

255

HashMap<String,String> args = new HashMap<String,String>();

256

args.put("hl", "true");

257

args.put("hl.fl", "textgap");

258

args.put("df", "textgap");

259

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

260

"standard", 0, 200, args);

261

262

assertU(adoc("textgap", "first entry hasnt queryword",

263

"textgap", "second entry has queryword long",

264

"id", "1"));

265

assertU(commit());

266

assertU(optimize());

267

assertQ("Basic summarization",

268

sumLRF.makeRequest("long"),

269

"//lst[@name='highlighting']/lst[@name='1']",

270

"//lst[@name='1']/arr[@name='textgap']/str"

271

);

272

273

}

274

275

@Test

276

public void testMultiValueBestFragmentHighlight() {

277

HashMap<String,String> args = new HashMap<String,String>();

278

args.put("hl", "true");

279

args.put("hl.fl", "textgap");

280

args.put("df", "textgap");

281

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

282

"standard", 0, 200, args);

283

284

assertU(adoc("textgap", "first entry has one word foo",

285

"textgap", "second entry has both words foo bar",

286

"id", "1"));

287

assertU(commit());

288

assertU(optimize());

289

assertQ("Best fragment summarization",

290

sumLRF.makeRequest("foo bar"),

291

"//lst[@name='highlighting']/lst[@name='1']",

292

"//lst[@name='1']/arr[@name='textgap']/str[.=\'second entry has both words foo bar\']"

293

);

294

}

295

296

@Test

297

public void testDefaultFieldHighlight() {

298

299

// do summarization using re-analysis of the field

300

HashMap<String,String> args = new HashMap<String,String>();

301

args.put("hl", "true");

302

args.put("df", "t_text");

303

args.put("hl.fl", "");

304

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

305

"standard", 0, 200, args);

306

307

assertU(adoc("t_text", "a long day's night", "id", "1"));

308

assertU(commit());

309

assertU(optimize());

310

assertQ("Basic summarization",

311

sumLRF.makeRequest("long"),

312

"//lst[@name='highlighting']/lst[@name='1']",

313

"//lst[@name='1']/arr[@name='t_text']/str"

314

);

315

316

}

317

318

319

@Test

320

public void testHighlightDisabled() {

321

322

// ensure highlighting can be explicitly disabled

323

HashMap<String,String> args = new HashMap<String,String>();

324

args.put("hl", "false");

325

args.put("hl.fl", "t_text");

326

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

327

"standard", 0, 200, args);

328

329

assertU(adoc("t_text", "a long day's night", "id", "1"));

330

assertU(commit());

331

assertU(optimize());

332

assertQ("Basic summarization",

333

sumLRF.makeRequest("t_text:long"), "not(//lst[@name='highlighting'])");

334

335

}

336

337

@Test

338

public void testTwoFieldHighlight() {

339

340

// do summarization using re-analysis of the field

341

HashMap<String,String> args = new HashMap<String,String>();

342

args.put("hl", "true");

343

args.put("hl.fl", "t_text tv_text");

344

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

345

"standard", 0, 200, args);

346

347

assertU(adoc("t_text", "a long day's night", "id", "1",

348

"tv_text", "a long night's day"));

349

assertU(commit());

350

assertU(optimize());

351

assertQ("Basic summarization",

352

sumLRF.makeRequest("t_text:long"),

353

"//lst[@name='highlighting']/lst[@name='1']",

354

"//lst[@name='1']/arr[@name='t_text']/str",

355

"//lst[@name='1']/arr[@name='tv_text']/str"

356

);

357

}

358

359

@Test

360

public void testFieldMatch()

361

{

362

assertU(adoc("t_text1", "random words for highlighting tests", "id", "1",

363

"t_text2", "more random words for second field"));

364

assertU(commit());

365

assertU(optimize());

366

367

HashMap<String,String> args = new HashMap<String,String>();

368

args.put("hl", "true");

369

args.put("hl.fl", "t_text1 t_text2");

370

371

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

372

"standard", 0, 200, args);

373

// default should highlight both random and words in both fields

374

assertQ("Test Default",

375

sumLRF.makeRequest("t_text1:random OR t_text2:words"),

376

"//lst[@name='highlighting']/lst[@name='1']",

377

"//lst[@name='1']/arr[@name='t_text1']/str[.='random words for highlighting tests']",

378

"//lst[@name='1']/arr[@name='t_text2']/str[.='more random words for second field']"

379

);

380

381

// requireFieldMatch=true - highlighting should only occur if term matched in that field

382

args.put("hl.requireFieldMatch", "true");

383

sumLRF = h.getRequestFactory(

384

"standard", 0, 200, args);

385

assertQ("Test RequireFieldMatch",

386

sumLRF.makeRequest("t_text1:random OR t_text2:words"),

387

"//lst[@name='highlighting']/lst[@name='1']",

388

"//lst[@name='1']/arr[@name='t_text1']/str[.='random words for highlighting tests']",

389

"//lst[@name='1']/arr[@name='t_text2']/str[.='more random words for second field']"

390

);

391

392

// test case for un-optimized index

393

assertU(adoc("t_text1", "random words for highlighting tests", "id", "2",

394

"t_text2", "more random words for second field"));

395

assertU(delI("1"));

396

assertU(commit());

397

sumLRF = h.getRequestFactory(

398

"standard", 0, 200, args);

399

assertQ("Test RequireFieldMatch on un-optimized index",

400

sumLRF.makeRequest("t_text1:random OR t_text2:words"),

401

"//lst[@name='highlighting']/lst[@name='2']",

402

"//lst[@name='2']/arr[@name='t_text1']/str[.='random words for highlighting tests']",

403

"//lst[@name='2']/arr[@name='t_text2']/str[.='more random words for second field']"

404

);

405

}

406

407

@Test

408

public void testCustomSimpleFormatterHighlight() {

409

410

// do summarization using a custom formatter

411

HashMap<String,String> args = new HashMap<String,String>();

412

args.put("hl", "true");

413

args.put("hl.fl", "t_text");

414

args.put("hl.simple.pre","");

415

args.put("hl.simple.post","");

416

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

417

"standard", 0, 200, args);

418

419

assertU(adoc("t_text", "a long days night", "id", "1"));

420

assertU(commit());

421

assertU(optimize());

422

assertQ("Basic summarization",

423

sumLRF.makeRequest("t_text:long"),

424

"//lst[@name='highlighting']/lst[@name='1']",

425

"//lst[@name='1']/arr[@name='t_text']/str[.='a long days night']"

426

);

427

428

// test a per-field override

429

args.put("f.t_text.hl.simple.pre","");

430

args.put("f.t_text.hl.simple.post","");

431

sumLRF = h.getRequestFactory(

432

"standard", 0, 200, args);

433

assertQ("Basic summarization",

434

sumLRF.makeRequest("t_text:long"),

435

"//lst[@name='highlighting']/lst[@name='1']",

436

"//lst[@name='1']/arr[@name='t_text']/str[.='a long days night']"

437

);

438

439

}

440

441

@Test

442

public void testLongFragment() {

443

444

HashMap<String,String> args = new HashMap<String,String>();

445

args.put("hl", "true");

446

args.put("hl.fl", "tv_text");

447

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

448

"standard", 0, 200, args);

449

450

451

String text =

452

"junit: [mkdir] Created dir: /home/klaas/worio/backend/trunk/build-src/solr-nightly/build/test-results [junit] Running org.apache.solr.BasicFunctionalityTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 5.36 sec [junit] Running org.apache.solr.ConvertedLegacyTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 8.268 sec [junit] Running org.apache.solr.DisMaxRequestHandlerTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.56 sec [junit] Running org.apache.solr.HighlighterTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 4.979 sec [junit] Running org.apache.solr.OutputWriterTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.797 sec [junit] Running org.apache.solr.SampleTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 1.021 sec [junit] Running org.apache.solr.analysis.TestBufferedTokenStream [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.05 sec [junit] Running org.apache.solr.analysis.TestRemoveDuplicatesTokenFilter [junit] Tests run: 3, Failures: 0, Errors: 0, Time elapsed: 0.054 sec [junit] Running org.apache.solr.analysis.TestSynonymFilter [junit] Tests run: 6, Failures: 0, Errors: 0, Time elapsed: 0.081 sec [junit] Running org.apache.solr.analysis.TestWordDelimiterFilter [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.714 sec [junit] Running org.apache.solr.search.TestDocSet [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 0.788 sec [junit] Running org.apache.solr.util.SolrPluginUtilsTest [junit] Tests run: 5, Failures: 0, Errors: 0, Time elapsed: 3.519 sec [junit] Running org.apache.solr.util.TestOpenBitSet [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.533 sec";

453

assertU(adoc("tv_text", text, "id", "1"));

454

assertU(commit());

455

assertU(optimize());

456

assertQ("Basic summarization",

457

sumLRF.makeRequest("tv_text:dir"),

458

"//lst[@name='highlighting']/lst[@name='1']",

459

"//lst[@name='1']/arr[@name='tv_text']/str"

460

);

461

}

462

463

@Test

464

public void testMaxChars() {

465

HashMap<String,String> args = new HashMap<String,String>();

466

args.put("fl", "id score");

467

args.put("hl", "true");

468

args.put("hl.snippets", "10");

469

args.put("hl.fl", "t_text");

470

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

471

"standard", 0, 200, args);

472

473

474

assertU(adoc("t_text", LONG_TEXT, "id", "1"));

475

assertU(commit());

476

assertU(optimize());

477

assertQ("token at start of text",

478

sumLRF.makeRequest("t_text:disjoint"),

479

"//lst[@name='highlighting']/lst[@name='1']",

480

"//lst[@name='1']/arr[count(str)=1]"

481

);

482

args.put("hl.maxAnalyzedChars", "20");

483

sumLRF = h.getRequestFactory("standard", 0, 200, args);

484

assertQ("token at end of text",

485

sumLRF.makeRequest("t_text:disjoint"),

486

"//lst[@name='highlighting']/lst[@name='1']",

487

"//lst[@name='1'][not(*)]"

488

);

489

args.put("hl.maxAnalyzedChars", "-1");

490

sumLRF = h.getRequestFactory("standard", 0, 200, args);

491

assertQ("token at start of text",

492

sumLRF.makeRequest("t_text:disjoint"),

493

"//lst[@name='highlighting']/lst[@name='1']",

494

"//lst[@name='1']/arr[count(str)=1]"

495

);

496

}

497

498

@Test

499

public void testRegexFragmenter() {

500

HashMap<String,String> args = new HashMap<String,String>();

501

args.put("fl", "id score");

502

args.put("hl", "true");

503

args.put("hl.snippets", "10");

504

args.put("hl.fl", "t_text");

505

args.put("hl.fragmenter", "regex");

506

args.put("hl.regex.pattern", "[-\\w ,\"']{20,200}");

507

args.put("hl.regex.slop", ".9");

508

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

509

"standard", 0, 200, args);

510

511

String t = "This is an example of a sentence. Another example \"sentence\" with " +

512

"special characters\nand a line-break! Miscellaneous character like ^ are " +

513

"unknowns and end up being bad example s of sentences? I wonder how " +

514

"slashes/other punctuation fare in these examples?";

515

assertU(adoc("t_text", t, "id", "1"));

516

assertU(commit());

517

assertU(optimize());

518

assertQ("regex fragmenter",

519

sumLRF.makeRequest("t_text:example"),

520

"//lst[@name='highlighting']/lst[@name='1']",

521

"//arr/str[.='This is an example of a sentence']",

522

"//arr/str[.='. Another example \"sentence\" with special characters\nand a line-break']",

523

"//arr/str[.=' ^ are unknowns and end up being bad example s of sentences']",

524

"//arr/str[.='/other punctuation fare in these examples?']"

525

);

526

// try with some punctuation included

527

args.put("hl.regex.pattern", "[-\\w ,^/\\n\"']{20,200}");

528

sumLRF = h.getRequestFactory("standard", 0, 200, args);

529

assertQ("regex fragmenter 2",

530

sumLRF.makeRequest("t_text:example"),

531

"//lst[@name='highlighting']/lst[@name='1']",

532

"//arr/str[.='This is an example of a sentence']",

533

"//arr/str[.='. Another example \"sentence\" with special characters\nand a line-break']",

534

"//arr/str[.='! Miscellaneous character like ^ are unknowns and end up being bad example s of sentences']",

535

"//arr/str[.='? I wonder how slashes/other punctuation fare in these examples?']"

536

);

537

}

538

539

@Test

540

public void testVariableFragsize() {

541

assertU(adoc("tv_text", "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all",

542

"id", "1"));

543

assertU(commit());

544

assertU(optimize());

545

546

// default length

547

HashMap<String,String> args = new HashMap<String,String>();

548

args.put("hl", "true");

549

args.put("hl.fl", "tv_text");

550

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

551

"standard", 0, 200, args);

552

assertQ("Basic summarization",

553

sumLRF.makeRequest("tv_text:long"),

554

"//lst[@name='highlighting']/lst[@name='1']",

555

"//lst[@name='1']/arr[@name='tv_text']/str[.='a long days night this should be a piece of text which']"

556

);

557

558

// 25

559

args.put("hl.fragsize","25");

560

sumLRF = h.getRequestFactory(

561

"standard", 0, 200, args);

562

assertQ("Basic summarization",

563

sumLRF.makeRequest("tv_text:long"),

564

"//lst[@name='highlighting']/lst[@name='1']",

565

"//lst[@name='1']/arr[@name='tv_text']/str[.='a long days night']"

566

);

567

568

// 0 - NullFragmenter

569

args.put("hl.fragsize","0");

570

sumLRF = h.getRequestFactory(

571

"standard", 0, 200, args);

572

assertQ("Basic summarization",

573

sumLRF.makeRequest("tv_text:long"),

574

"//lst[@name='highlighting']/lst[@name='1']",

575

"//lst[@name='1']/arr[@name='tv_text']/str[.='a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all']"

576

);

577

}

578

579

@Test

580

public void testAlternateSummary() {

581

//long document

582

assertU(adoc("tv_text", "keyword is only here",

583

"t_text", "a piece of text to be substituted",

584

"id", "1",

585

"foo_t","hi"));

586

assertU(commit());

587

assertU(optimize());

588

589

// do summarization

590

HashMap<String,String> args = new HashMap<String,String>();

591

args.put("hl", "true");

592

args.put("hl.fragsize","0");

593

args.put("hl.fl", "t_text");

594

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

595

"standard", 0, 200, args);

596

597

// no alternate

598

assertQ("Alternate summarization",

599

sumLRF.makeRequest("tv_text:keyword"),

600

"//lst[@name='highlighting']/lst[@name='1']",

601

"//lst[@name='highlighting']/lst[@name='1' and count(*)=0]"

602

);

603

604

// with an alternate

605

args.put("hl.alternateField", "foo_t");

606

sumLRF = h.getRequestFactory("standard", 0, 200, args);

607

assertQ("Alternate summarization",

608

sumLRF.makeRequest("tv_text:keyword"),

609

"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",

610

"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='hi']"

611

);

612

613

// with an alternate + max length

614

args.put("hl.alternateField", "t_text");

615

args.put("hl.maxAlternateFieldLength", "15");

616

sumLRF = h.getRequestFactory("standard", 0, 200, args);

617

assertQ("Alternate summarization",

618

sumLRF.makeRequest("tv_text:keyword"),

619

"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",

620

"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='a piece of text']"

621

);

622

}

623

624

@Test

625

public void testPhraseHighlighter() {

626

HashMap<String,String> args = new HashMap<String,String>();

627

args.put("hl", "true");

628

args.put("hl.fl", "t_text");

629

args.put("hl.fragsize", "40");

630

args.put("hl.snippets", "10");

631

args.put("hl.usePhraseHighlighter", "false");

632

633

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

634

"standard", 0, 200, args);

635

636

// String borrowed from Lucene's HighlighterTest

637

String t = "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy";

638

639

assertU(adoc("t_text", t, "id", "1"));

640

assertU(commit());

641

assertU(optimize());

642

643

String oldHighlight1 = "//lst[@name='1']/arr[@name='t_text']/str[.='This piece of text refers to Kennedy']";

644

String oldHighlight2 = "//lst[@name='1']/arr[@name='t_text']/str[.=' at the beginning then has a longer piece of text']";

645

String oldHighlight3 = "//lst[@name='1']/arr[@name='t_text']/str[.=' with another reference to Kennedy']";

646

String newHighlight1 = "//lst[@name='1']/arr[@name='t_text']/str[.='This piece of text refers to Kennedy']";

647

648

// check if old functionality is still the same

649

assertQ("Phrase highlighting - old",

650

sumLRF.makeRequest("t_text:\"text refers\""),

651

"//lst[@name='highlighting']/lst[@name='1']",

652

oldHighlight1, oldHighlight2, oldHighlight3

653

);

654

655

assertQ("Phrase highlighting - old",

656

sumLRF.makeRequest("t_text:text refers"),

657

"//lst[@name='highlighting']/lst[@name='1']",

658

oldHighlight1, oldHighlight2, oldHighlight3

659

);

660

661

// now check if Lucene-794 highlighting works as expected

662

args.put("hl.usePhraseHighlighter", "true");

663

664

sumLRF = h.getRequestFactory("standard", 0, 200, args);

665

666

// check phrase highlighting

667

assertQ("Phrase highlighting - Lucene-794",

668

sumLRF.makeRequest("t_text:\"text refers\""),

669

"//lst[@name='highlighting']/lst[@name='1']",

670

newHighlight1

671

);

672

673

// non phrase queries should be highlighted as they were before this fix

674

assertQ("Phrase highlighting - Lucene-794",

675

sumLRF.makeRequest("t_text:text refers"),

676

"//lst[@name='highlighting']/lst[@name='1']",

677

oldHighlight1, oldHighlight2, oldHighlight3

678

);

679

}

680

681

@Test

682

public void testGetHighlightFields() {

683

HashMap<String, String> args = new HashMap<String, String>();

684

args.put("fl", "id score");

685

args.put("hl", "true");

686

args.put("hl.fl", "t*");

687

688

assertU(adoc("id", "0", "title", "test", // static stored

689

"text", "test", // static not stored

690

"foo_s", "test", // dynamic stored

691

"foo_sI", "test", // dynamic not stored

692

"weight", "1.0")); // stored but not text

693

assertU(commit());

694

assertU(optimize());

695

696

TestHarness.LocalRequestFactory lrf = h.getRequestFactory("standard", 0,

697

10, args);

698

699

SolrQueryRequest request = lrf.makeRequest("test");

700

SolrHighlighter highlighter = request.getCore().getHighlighter();

701

List<String> highlightFieldNames = Arrays.asList(highlighter

702

.getHighlightFields(null, request, new String[] {}));

703

assertTrue("Expected to highlight on field \"title\"", highlightFieldNames

704

.contains("title"));

705

assertFalse("Expected to not highlight on field \"text\"",

706

highlightFieldNames.contains("text"));

707

assertFalse("Expected to not highlight on field \"weight\"",

708

highlightFieldNames.contains("weight"));

709

request.close();

710

711

args.put("hl.fl", "foo_*");

712

lrf = h.getRequestFactory("standard", 0, 10, args);

713

request = lrf.makeRequest("test");

714

highlighter = request.getCore().getHighlighter();

715

highlightFieldNames = Arrays.asList(highlighter.getHighlightFields(null,

716

request, new String[] {}));

717

assertEquals("Expected one field to highlight on", 1, highlightFieldNames

718

.size());

719

assertEquals("Expected to highlight on field \"foo_s\"", "foo_s",

720

highlightFieldNames.get(0));

721

request.close();

722

}

723

724

@Test

725

public void testDefaultFieldPrefixWildcardHighlight() {

726

727

// do summarization using re-analysis of the field

728

HashMap<String,String> args = new HashMap<String,String>();

729

args.put("hl", "true");

730

args.put("df", "t_text");

731

args.put("hl.fl", "");

732

args.put("hl.usePhraseHighlighter", "true");

733

args.put("hl.highlightMultiTerm", "true");

734

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

735

"standard", 0, 200, args);

736

737

assertU(adoc("t_text", "a long day's night", "id", "1"));

738

assertU(commit());

739

assertU(optimize());

740

assertQ("Basic summarization",

741

sumLRF.makeRequest("lon*"),

742

"//lst[@name='highlighting']/lst[@name='1']",

743

"//lst[@name='1']/arr[@name='t_text']/str"

744

);

745

746

}

747

748

@Test

749

public void testDefaultFieldNonPrefixWildcardHighlight() {

750

751

// do summarization using re-analysis of the field

752

HashMap<String,String> args = new HashMap<String,String>();

753

args.put("hl", "true");

754

args.put("df", "t_text");

755

args.put("hl.fl", "");

756

args.put("hl.usePhraseHighlighter", "true");

757

args.put("hl.highlightMultiTerm", "true");

758

TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(

759

"standard", 0, 200, args);

760

761

assertU(adoc("t_text", "a long day's night", "id", "1"));

762

assertU(commit());

763

assertU(optimize());

764

assertQ("Basic summarization",

765

sumLRF.makeRequest("l*g"),

766

"//lst[@name='highlighting']/lst[@name='1']",

767

"//lst[@name='1']/arr[@name='t_text']/str"

768

);

769

770

}

771

772

public void testSubwordWildcardHighlight() {

773

assertU(adoc("subword", "lorem PowerShot.com ipsum", "id", "1"));

774

assertU(commit());

775

assertQ("subword wildcard highlighting",

776

req("q", "subword:pow*", "hl", "true", "hl.fl", "subword"),

777

"//lst[@name='highlighting']/lst[@name='1']" +

778

"/arr[@name='subword']/str='lorem PowerShot.com ipsum'");

779

}

780

781

public void testSubwordWildcardHighlightWithTermOffsets() {

782

assertU(adoc("subword_offsets", "lorem PowerShot.com ipsum", "id", "1"));

783

assertU(commit());

784

assertQ("subword wildcard highlighting",

785

req("q", "subword_offsets:pow*", "hl", "true", "hl.fl", "subword_offsets"),

786

"//lst[@name='highlighting']/lst[@name='1']" +

787

"/arr[@name='subword_offsets']/str='lorem PowerShot.com ipsum'");

788

}

789

790

public void testSubwordWildcardHighlightWithTermOffsets2() {

791

assertU(adoc("subword_offsets", "lorem PowerShot ipsum", "id", "1"));

792

assertU(commit());

793

assertQ("subword wildcard highlighting",

794

req("q", "subword_offsets:pow*", "hl", "true", "hl.fl", "subword_offsets"),

795

"//lst[@name='highlighting']/lst[@name='1']" +

796

"/arr[@name='subword_offsets']/str='lorem PowerShot ipsum'");

797

}

798

799

public void testHlQParameter() {

800

assertU(adoc("title", "Apache Software Foundation", "id", "1"));

801

assertU(commit());

802

assertQ("hl.q parameter overrides q parameter",

803

req("q", "title:Apache", "hl", "true", "hl.fl", "title", "hl.q", "title:Software"),

804

"//lst[@name='highlighting']/lst[@name='1']" +

805

"/arr[@name='title']/str='Apache Software Foundation'");

806

assertQ("hl.q parameter overrides q parameter",

807

req("q", "title:Apache", "hl", "true", "hl.fl", "title", "hl.q", "{!v=$qq}", "qq", "title:Foundation"),

808

"//lst[@name='highlighting']/lst[@name='1']" +

809

"/arr[@name='title']/str='Apache Software Foundation'");

810

}

811

}

Older »