~slub.team/goobi-indexserver/3.x

final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);

111

indexSearcher.search(new TermQuery(new Term("content", "random")), c2);

112

113

final TopGroups groups = c2.getTopGroups(0);

114

115

assertEquals(7, groups.totalHitCount);

116

assertEquals(7, groups.totalGroupedHitCount);

117

assertEquals(4, groups.groups.length);

118

119

// relevance order: 5, 0, 3, 4, 1, 2, 6

120

121

// the later a document is added the higher this docId

122

// value

123

GroupDocs group = groups.groups[0];

124

assertEquals("author3", group.groupValue);

125

assertEquals(2, group.scoreDocs.length);

126

assertEquals(5, group.scoreDocs[0].doc);

127

assertEquals(4, group.scoreDocs[1].doc);

128

assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score);

129

130

group = groups.groups[1];

131

assertEquals("author1", group.groupValue);

132

assertEquals(3, group.scoreDocs.length);

133

assertEquals(0, group.scoreDocs[0].doc);

134

assertEquals(1, group.scoreDocs[1].doc);

135

assertEquals(2, group.scoreDocs[2].doc);

136

assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score);

137

assertTrue(group.scoreDocs[1].score > group.scoreDocs[2].score);

138

139

group = groups.groups[2];

140

assertEquals("author2", group.groupValue);

141

assertEquals(1, group.scoreDocs.length);

142

assertEquals(3, group.scoreDocs[0].doc);

143

144

group = groups.groups[3];

145

assertNull(group.groupValue);

146

assertEquals(1, group.scoreDocs.length);

147

assertEquals(6, group.scoreDocs[0].doc);

148

149

indexSearcher.getIndexReader().close();

150

dir.close();

151

}

152

153

private static class GroupDoc {

154

final int id;

155

final String group;

156

final String sort1;

157

final String sort2;

158

// content must be "realN ..."

159

final String content;

160

float score;

161

float score2;

162

163

public GroupDoc(int id, String group, String sort1, String sort2, String content) {

164

this.id = id;

165

this.group = group;

166

this.sort1 = sort1;

167

this.sort2 = sort2;

168

this.content = content;

169

}

170

}

171

172

private Sort getRandomSort() {

173

final List<SortField> sortFields = new ArrayList<SortField>();

174

if (random.nextInt(7) == 2) {

175

sortFields.add(SortField.FIELD_SCORE);

176

} else {

177

if (random.nextBoolean()) {

178

if (random.nextBoolean()) {

179

sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean()));

180

} else {

181

sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean()));

182

}

183

} else if (random.nextBoolean()) {

184

sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean()));

185

sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean()));

186

}

187

}

188

// Break ties:

189

sortFields.add(new SortField("id", SortField.INT));

190

return new Sort(sortFields.toArray(new SortField[sortFields.size()]));

191

}

192

193

private Comparator<GroupDoc> getComparator(Sort sort) {

194

final SortField[] sortFields = sort.getSort();

195

return new Comparator<GroupDoc>() {

196

// @Override -- Not until Java 1.6

197

public int compare(GroupDoc d1, GroupDoc d2) {

198

for(SortField sf : sortFields) {

199

final int cmp;

200

if (sf.getType() == SortField.SCORE) {

201

if (d1.score > d2.score) {

202

cmp = -1;

203

} else if (d1.score < d2.score) {

204

cmp = 1;

205

} else {

206

cmp = 0;

207

}

208

} else if (sf.getField().equals("sort1")) {

209

cmp = d1.sort1.compareTo(d2.sort1);

210

} else if (sf.getField().equals("sort2")) {

211

cmp = d1.sort2.compareTo(d2.sort2);

212

} else {

213

assertEquals(sf.getField(), "id");

214

cmp = d1.id - d2.id;

215

}

216

if (cmp != 0) {

217

return sf.getReverse() ? -cmp : cmp;

218

}

219

}

220

// Our sort always fully tie breaks:

221

fail();

222

return 0;

223

}

224

};

225

}

226

227

private Comparable<?>[] fillFields(GroupDoc d, Sort sort) {

228

final SortField[] sortFields = sort.getSort();

229

final Comparable<?>[] fields = new Comparable[sortFields.length];

230

for(int fieldIDX=0;fieldIDX<sortFields.length;fieldIDX++) {

231

final Comparable<?> c;

232

final SortField sf = sortFields[fieldIDX];

233

if (sf.getType() == SortField.SCORE) {

234

c = new Float(d.score);

235

} else if (sf.getField().equals("sort1")) {

236

c = d.sort1;

237

} else if (sf.getField().equals("sort2")) {

238

c = d.sort2;

239

} else {

240

assertEquals("id", sf.getField());

241

c = new Integer(d.id);

242

}

243

fields[fieldIDX] = c;

244

}

245

return fields;

246

}

247

248

private String groupToString(String b) {

249

if (b == null) {

250

return "null";

251

} else {

252

return b;

253

}

254

}

255

256

private TopGroups<String> slowGrouping(GroupDoc[] groupDocs,

257

String searchTerm,

258

boolean fillFields,

259

boolean getScores,

260

boolean getMaxScores,

261

boolean doAllGroups,

262

Sort groupSort,

263

Sort docSort,

264

int topNGroups,

265

int docsPerGroup,

266

int groupOffset,

267

int docOffset) {

268

269

final Comparator<GroupDoc> groupSortComp = getComparator(groupSort);

270

271

Arrays.sort(groupDocs, groupSortComp);

272

final HashMap<String,List<GroupDoc>> groups = new HashMap<String,List<GroupDoc>>();

273

final List<String> sortedGroups = new ArrayList<String>();

274

final List<Comparable<?>[]> sortedGroupFields = new ArrayList<Comparable<?>[]>();

275

276

int totalHitCount = 0;

277

Set<String> knownGroups = new HashSet<String>();

278

279

//System.out.println("TEST: slowGrouping");

280

for(GroupDoc d : groupDocs) {

281

// TODO: would be better to filter by searchTerm before sorting!

282

if (!d.content.startsWith(searchTerm)) {

283

continue;

284

}

285

totalHitCount++;

286

287

//System.out.println(" match id=" + d.id + " score=" + d.score);

288

289

if (doAllGroups) {

290

if (!knownGroups.contains(d.group)) {

291

knownGroups.add(d.group);

292

//System.out.println(" add group=" + groupToString(d.group));

293

}

294

}

295

296

List<GroupDoc> l = groups.get(d.group);

297

if (l == null) {

298

//System.out.println(" add sortedGroup=" + groupToString(d.group));

299

sortedGroups.add(d.group);

300

if (fillFields) {

301

sortedGroupFields.add(fillFields(d, groupSort));

302

}

303

l = new ArrayList<GroupDoc>();

304

groups.put(d.group, l);

305

}

306

l.add(d);

307

}

308

309

if (groupOffset >= sortedGroups.size()) {

310

// slice is out of bounds

311

return null;

312

}

313

314

final int limit = Math.min(groupOffset + topNGroups, groups.size());

315

316

final Comparator<GroupDoc> docSortComp = getComparator(docSort);

317

@SuppressWarnings("unchecked")

318

final GroupDocs<String>[] result = new GroupDocs[limit-groupOffset];

319

int totalGroupedHitCount = 0;

320

for(int idx=groupOffset;idx < limit;idx++) {

321

final String group = sortedGroups.get(idx);

322

final List<GroupDoc> docs = groups.get(group);

323

totalGroupedHitCount += docs.size();

324

Collections.sort(docs, docSortComp);

325

final ScoreDoc[] hits;

326

if (docs.size() > docOffset) {

327

final int docIDXLimit = Math.min(docOffset + docsPerGroup, docs.size());

328

hits = new ScoreDoc[docIDXLimit - docOffset];

329

for(int docIDX=docOffset; docIDX < docIDXLimit; docIDX++) {

330

final GroupDoc d = docs.get(docIDX);

331

final FieldDoc fd;

332

if (fillFields) {

333

fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN, fillFields(d, docSort));

334

} else {

335

fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN);

336

}

337

hits[docIDX-docOffset] = fd;

338

}

339

} else {

340

hits = new ScoreDoc[0];

341

}

342

343

result[idx-groupOffset] = new GroupDocs<String>(0.0f,

344

docs.size(),

345

hits,

346

group,

347

fillFields ? sortedGroupFields.get(idx) : null);

348

}

349

350

if (doAllGroups) {

351

return new TopGroups<String>(

352

new TopGroups<String>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result),

353

knownGroups.size()

354

);

355

} else {

356

return new TopGroups<String>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);

357

}

358

}

359

360

private IndexReader getDocBlockReader(Directory dir, GroupDoc[] groupDocs) throws IOException {

361

// Coalesce by group, but in random order:

362

Collections.shuffle(Arrays.asList(groupDocs), random);

363

final Map<String,List<GroupDoc>> groupMap = new HashMap<String,List<GroupDoc>>();

364

final List<String> groupValues = new ArrayList<String>();

365

366

for(GroupDoc groupDoc : groupDocs) {

367

if (!groupMap.containsKey(groupDoc.group)) {

368

groupValues.add(groupDoc.group);

369

groupMap.put(groupDoc.group, new ArrayList<GroupDoc>());

370

}

371

groupMap.get(groupDoc.group).add(groupDoc);

372

}

373

374

RandomIndexWriter w = new RandomIndexWriter(

375

random,

376

dir,

377

newIndexWriterConfig(TEST_VERSION_CURRENT,

378

new MockAnalyzer(random)));

379

380

final List<List<Document>> updateDocs = new ArrayList<List<Document>>();

381

382

//System.out.println("TEST: index groups");

383

for(String group : groupValues) {

384

final List<Document> docs = new ArrayList<Document>();

385

//System.out.println("TEST: group=" + (group == null ? "null" : group.utf8ToString()));

386

for(GroupDoc groupValue : groupMap.get(group)) {

387

Document doc = new Document();

388

docs.add(doc);

389

if (groupValue.group != null) {

390

doc.add(newField("group", groupValue.group, Field.Index.NOT_ANALYZED));

391

}

392

doc.add(newField("sort1", groupValue.sort1, Field.Index.NOT_ANALYZED));

393

doc.add(newField("sort2", groupValue.sort2, Field.Index.NOT_ANALYZED));

394

doc.add(new NumericField("id").setIntValue(groupValue.id));

395

doc.add(newField("content", groupValue.content, Field.Index.ANALYZED));

396

//System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id);

397

}

398

// So we can pull filter marking last doc in block:

399

final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED);

400

groupEnd.setIndexOptions(IndexOptions.DOCS_ONLY);

401

groupEnd.setOmitNorms(true);

402

docs.get(docs.size()-1).add(groupEnd);

403

// Add as a doc block:

404

w.addDocuments(docs);

405

if (group != null && random.nextInt(7) == 4) {

406

updateDocs.add(docs);

407

}

408

}

409

410

for(List<Document> docs : updateDocs) {

411

// Just replaces docs w/ same docs:

412

w.updateDocuments(new Term("group", docs.get(0).get("group")),

413

docs);

414

}

415

416

final IndexReader r = w.getReader();

417

w.close();

418

419

return r;

420

}

421

422

private static class ShardState {

423

424

public final ShardSearcher[] subSearchers;

425

public final int[] docStarts;

426

427

public ShardState(IndexSearcher s) {

428

IndexReader[] subReaders = s.getIndexReader().getSequentialSubReaders();

429

if (subReaders == null) {

430

subReaders = new IndexReader[] {s.getIndexReader()};

431

}

432

subSearchers = new ShardSearcher[subReaders.length];

433

for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {

434

subSearchers[searcherIDX] = new ShardSearcher(subReaders[searcherIDX]);

435

}

436

437

docStarts = new int[subSearchers.length];

438

int docBase = 0;

439

for(int subIDX=0;subIDX<docStarts.length;subIDX++) {

440

docStarts[subIDX] = docBase;

441

docBase += subReaders[subIDX].maxDoc();

442

//System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);

443

}

444

}

445

}

446

447

public void testRandom() throws Exception {

448

for(int iter=0;iter<3;iter++) {

449

450

if (VERBOSE) {

451

System.out.println("TEST: iter=" + iter);

452

}

453

454

final int numDocs = _TestUtil.nextInt(random, 100, 1000) * RANDOM_MULTIPLIER;

455

//final int numDocs = _TestUtil.nextInt(random, 5, 20);

456

457

final int numGroups = _TestUtil.nextInt(random, 1, numDocs);

458

459

if (VERBOSE) {

460

System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);

461

}

462

463

final List<String> groups = new ArrayList<String>();

464

for(int i=0;i<numGroups;i++) {

465

groups.add(_TestUtil.randomRealisticUnicodeString(random));

466

//groups.add(_TestUtil.randomUnicodeString(random));

467

assertEquals(-1, groups.get(groups.size()-1).indexOf(0xffff));

468

//groups.add(new BytesRef(_TestUtil.randomSimpleString(random)));

469

}

470

final String[] contentStrings = new String[_TestUtil.nextInt(random, 2, 20)];

471

if (VERBOSE) {

472

System.out.println("TEST: create fake content");

473

}

474

for(int contentIDX=0;contentIDX<contentStrings.length;contentIDX++) {

475

final StringBuilder sb = new StringBuilder();

476

sb.append("real" + random.nextInt(3)).append(' ');

477

final int fakeCount = random.nextInt(10);

478

for(int fakeIDX=0;fakeIDX<fakeCount;fakeIDX++) {

479

sb.append("fake ");

480

}

481

contentStrings[contentIDX] = sb.toString();

482

if (VERBOSE) {

483

System.out.println(" content=" + sb.toString());

484

}

485

}

486

487

Directory dir = newDirectory();

488

RandomIndexWriter w = new RandomIndexWriter(

489

random,

490

dir,

491

newIndexWriterConfig(TEST_VERSION_CURRENT,

492

new MockAnalyzer(random)));

493

494

Document doc = new Document();

495

Document docNoGroup = new Document();

496

Field group = newField("group", "", Field.Index.NOT_ANALYZED);

497

doc.add(group);

498

Field sort1 = newField("sort1", "", Field.Index.NOT_ANALYZED);

499

doc.add(sort1);

500

docNoGroup.add(sort1);

501

Field sort2 = newField("sort2", "", Field.Index.NOT_ANALYZED);

502

doc.add(sort2);

503

docNoGroup.add(sort2);

504

Field content = newField("content", "", Field.Index.ANALYZED);

505

doc.add(content);

506

docNoGroup.add(content);

507

NumericField id = new NumericField("id");

508

doc.add(id);

509

docNoGroup.add(id);

510

final GroupDoc[] groupDocs = new GroupDoc[numDocs];

511

for(int i=0;i<numDocs;i++) {

512

final String groupValue;

513

if (random.nextInt(24) == 17) {

514

// So we test the "doc doesn't have the group'd

515

// field" case:

516

groupValue = null;

517

} else {

518

groupValue = groups.get(random.nextInt(groups.size()));

519

}

520

final GroupDoc groupDoc = new GroupDoc(i,

521

groupValue,

522

groups.get(random.nextInt(groups.size())),

523

groups.get(random.nextInt(groups.size())),

524

contentStrings[random.nextInt(contentStrings.length)]);

525

if (VERBOSE) {

526

System.out.println(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group) + " sort1=" + groupDoc.sort1 + " sort2=" + groupDoc.sort2);

527

}

528

529

groupDocs[i] = groupDoc;

530

if (groupDoc.group != null) {

531

group.setValue(groupDoc.group);

532

}

533

sort1.setValue(groupDoc.sort1);

534

sort2.setValue(groupDoc.sort2);

535

content.setValue(groupDoc.content);

536

id.setIntValue(groupDoc.id);

537

if (groupDoc.group == null) {

538

w.addDocument(docNoGroup);

539

} else {

540

w.addDocument(doc);

541

}

542

}

543

544

final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];

545

System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);

546

547

final IndexReader r = w.getReader();

548

w.close();

549

550

// NOTE: intentional but temporary field cache insanity!

551

final int[] docIDToID = FieldCache.DEFAULT.getInts(r, "id");

552

IndexReader rBlocks = null;

553

Directory dirBlocks = null;

554

555

try {

556

final IndexSearcher s = newSearcher(r);

557

final ShardState shards = new ShardState(s);

558

559

for(int contentID=0;contentID<3;contentID++) {

560

final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;

561

for(ScoreDoc hit : hits) {

562

final GroupDoc gd = groupDocs[docIDToID[hit.doc]];

563

assertTrue(gd.score == 0.0);

564

gd.score = hit.score;

565

assertEquals(gd.id, docIDToID[hit.doc]);

566

//System.out.println(" score=" + hit.score + " id=" + docIDToID[hit.doc]);

567

}

568

}

569

570

for(GroupDoc gd : groupDocs) {

571

assertTrue(gd.score != 0.0);

572

}

573

574

// Build 2nd index, where docs are added in blocks by

575

// group, so we can use single pass collector

576

dirBlocks = newDirectory();

577

rBlocks = getDocBlockReader(dirBlocks, groupDocs);

578

final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));

579

final int[] docIDToIDBlocks = FieldCache.DEFAULT.getInts(rBlocks, "id");

580

581

final IndexSearcher sBlocks = newSearcher(rBlocks);

582

final ShardState shardsBlocks = new ShardState(sBlocks);

583

584

// ReaderBlocks only increases maxDoc() vs reader, which

585

// means a monotonic shift in scores, so we can

586

// reliably remap them w/ Map:

587

final Map<String,Map<Float,Float>> scoreMap = new HashMap<String,Map<Float,Float>>();

588

589

// Tricky: must separately set .score2, because the doc

590

// block index was created with possible deletions!

591

//System.out.println("fixup score2");

592

for(int contentID=0;contentID<3;contentID++) {

593

//System.out.println(" term=real" + contentID);

594

final Map<Float,Float> termScoreMap = new HashMap<Float,Float>();

595

scoreMap.put("real"+contentID, termScoreMap);

596

//System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +

597

//" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));

598

final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;

599

for(ScoreDoc hit : hits) {

600

final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];

601

assertTrue(gd.score2 == 0.0);

602

gd.score2 = hit.score;

603

assertEquals(gd.id, docIDToIDBlocks[hit.doc]);

604

//System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);

605

termScoreMap.put(gd.score, gd.score2);

606

}

607

}

608

609

for(int searchIter=0;searchIter<100;searchIter++) {

610

611

if (VERBOSE) {

612

System.out.println("\nTEST: searchIter=" + searchIter);

613

}

614

615

final String searchTerm = "real" + random.nextInt(3);

616

final boolean fillFields = random.nextBoolean();

617

boolean getScores = random.nextBoolean();

618

final boolean getMaxScores = random.nextBoolean();

619

final Sort groupSort = getRandomSort();

620

//final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});

621

// TODO: also test null (= sort by relevance)

622

final Sort docSort = getRandomSort();

623

624

for(SortField sf : docSort.getSort()) {

625

if (sf.getType() == SortField.SCORE) {

626

getScores = true;

627

}

628

}

629

630

for(SortField sf : groupSort.getSort()) {

631

if (sf.getType() == SortField.SCORE) {

632

getScores = true;

633

}

634

}

635

636

final int topNGroups = _TestUtil.nextInt(random, 1, 30);

637

//final int topNGroups = 10;

638

final int docsPerGroup = _TestUtil.nextInt(random, 1, 50);

639

640

final int groupOffset = _TestUtil.nextInt(random, 0, (topNGroups-1)/2);

641

//final int groupOffset = 0;

642

643

final int docOffset = _TestUtil.nextInt(random, 0, docsPerGroup-1);

644

//final int docOffset = 0;

645

646

final boolean doCache = random.nextBoolean();

647

final boolean doAllGroups = random.nextBoolean();

648

if (VERBOSE) {

649

System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) +" dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);

650

}

651

652

final TermAllGroupsCollector allGroupsCollector;

653

if (doAllGroups) {

654

allGroupsCollector = new TermAllGroupsCollector("group");

655

} else {

656

allGroupsCollector = null;

657

}

658

659

final TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);

660

final CachingCollector cCache;

661

final Collector c;

662

663

final boolean useWrappingCollector = random.nextBoolean();

664

665

if (doCache) {

666

final double maxCacheMB = random.nextDouble();

667

if (VERBOSE) {

668

System.out.println("TEST: maxCacheMB=" + maxCacheMB);

669

}

670

671

if (useWrappingCollector) {

672

if (doAllGroups) {

673

cCache = CachingCollector.create(c1, true, maxCacheMB);

674

c = MultiCollector.wrap(cCache, allGroupsCollector);

675

} else {

676

c = cCache = CachingCollector.create(c1, true, maxCacheMB);

677

}

678

} else {

679

// Collect only into cache, then replay multiple times:

680

c = cCache = CachingCollector.create(false, true, maxCacheMB);

681

}

682

} else {

683

cCache = null;

684

if (doAllGroups) {

685

c = MultiCollector.wrap(c1, allGroupsCollector);

686

} else {

687

c = c1;

688

}

689

}

690

691

// Search top reader:

692

final Query query = new TermQuery(new Term("content", searchTerm));

693

s.search(query, c);

694

695

if (doCache && !useWrappingCollector) {

696

if (cCache.isCached()) {

697

// Replay for first-pass grouping

698

cCache.replay(c1);

699

if (doAllGroups) {

700

// Replay for all groups:

701

cCache.replay(allGroupsCollector);

702

}

703

} else {

704

// Replay by re-running search:

705

s.search(query, c1);

706

if (doAllGroups) {

707

s.search(query, allGroupsCollector);

708

}

709

}

710

}

711

712

// Get 1st pass top groups

713

final Collection<SearchGroup<String>> topGroups = c1.getTopGroups(groupOffset, fillFields);

714

715

if (VERBOSE) {

716

System.out.println("TEST: first pass topGroups");

717

if (topGroups == null) {

718

System.out.println(" null");

719

} else {

720

for(SearchGroup<String> searchGroup : topGroups) {

721

System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));

722

}

723

}

724

}

725

726

// Get 1st pass top groups using shards

727

final TopGroups<String> topGroupsShards = searchShards(s, shards, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);

728

729

final TopGroups<String> groupsResult;

730

if (topGroups != null) {

731

732

if (VERBOSE) {

733

System.out.println("TEST: topGroups");

734

for (SearchGroup<String> searchGroup : topGroups) {

735

System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));

736

}

737

}

738

739

// Get 2nd pass grouped result:

740

final TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);

741

if (doCache) {

742

if (cCache.isCached()) {

743

if (VERBOSE) {

744

System.out.println("TEST: cache is intact");

745

}

746

cCache.replay(c2);

747

} else {

748

if (VERBOSE) {

749

System.out.println("TEST: cache was too large");

750

}

751

s.search(query, c2);

752

}

753

} else {

754

s.search(query, c2);

755

}

756

757

if (doAllGroups) {

758

TopGroups<String> tempTopGroups = c2.getTopGroups(docOffset);

759

groupsResult = new TopGroups<String>(tempTopGroups, allGroupsCollector.getGroupCount());

760

} else {

761

groupsResult = c2.getTopGroups(docOffset);

762

}

763

} else {

764

groupsResult = null;

765

if (VERBOSE) {

766

System.out.println("TEST: no results");

767

}

768

}

769

770

final TopGroups<String> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);

771

772

if (VERBOSE) {

773

if (expectedGroups == null) {

774

System.out.println("TEST: no expected groups");

775

} else {

776

System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);

777

for(GroupDocs<String> gd : expectedGroups.groups) {

778

System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);

779

for(ScoreDoc sd : gd.scoreDocs) {

780

System.out.println(" id=" + sd.doc + " score=" + sd.score);

781

}

782

}

783

}

784

785

if (groupsResult == null) {

786

System.out.println("TEST: no matched groups");

787

} else {

788

System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);

789

for(GroupDocs<String> gd : groupsResult.groups) {

790

System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);

791

for(ScoreDoc sd : gd.scoreDocs) {

792

System.out.println(" id=" + docIDToID[sd.doc] + " score=" + sd.score);

793

}

794

}

795

796

if (searchIter == 14) {

797

for(int docIDX=0;docIDX<s.maxDoc();docIDX++) {

798

System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));

799

}

800

}

801

}

802

}

803

804

assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores);

805

806

// Confirm merged shards match:

807

assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores);

808

if (topGroupsShards != null) {

809

verifyShards(shards.docStarts, topGroupsShards);

810

}

811

812

final boolean needsScores = getScores || getMaxScores || docSort == null;

813

final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, lastDocInBlock);

814

final TermAllGroupsCollector allGroupsCollector2;

815

final Collector c4;

816

if (doAllGroups) {

817

allGroupsCollector2 = new TermAllGroupsCollector("group");

818

c4 = MultiCollector.wrap(c3, allGroupsCollector2);

819

} else {

820

allGroupsCollector2 = null;

821

c4 = c3;

822

}

823

// Get block grouping result:

824

sBlocks.search(query, c4);

825

@SuppressWarnings("unchecked")

826

final TopGroups<String> tempTopGroupsBlocks = c3.getTopGroups(docSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);

827

final TopGroups<String> groupsResultBlocks;

828

if (doAllGroups && tempTopGroupsBlocks != null) {

829

assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());

830

groupsResultBlocks = new TopGroups<String>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());

831

} else {

832

groupsResultBlocks = tempTopGroupsBlocks;

833

}

834

835

// Get shard'd block grouping result:

836

final TopGroups<String> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores);

837

838

if (VERBOSE) {

839

if (groupsResultBlocks == null) {

840

System.out.println("TEST: no block groups");

841

} else {

842

System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);

843

boolean first = true;

844

for(GroupDocs<String> gd : groupsResultBlocks.groups) {

845

System.out.println(" group=" + gd.groupValue + " totalHits=" + gd.totalHits);

846

for(ScoreDoc sd : gd.scoreDocs) {

847

System.out.println(" id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);

848

if (first) {

849

System.out.println("explain: " + sBlocks.explain(query, sd.doc));

850

first = false;

851

}

852

}

853

}

854

}

855

}

856

857

if (expectedGroups != null) {

858

// Fixup scores for reader2

859

for (GroupDocs groupDocsHits : expectedGroups.groups) {

860

for(ScoreDoc hit : groupDocsHits.scoreDocs) {

861

final GroupDoc gd = groupDocsByID[hit.doc];

862

assertEquals(gd.id, hit.doc);

863

//System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);

864

hit.score = gd.score2;

865

}

866

}

867

868

final SortField[] sortFields = groupSort.getSort();

869

final Map<Float,Float> termScoreMap = scoreMap.get(searchTerm);

870

for(int groupSortIDX=0;groupSortIDX<sortFields.length;groupSortIDX++) {

871

if (sortFields[groupSortIDX].getType() == SortField.SCORE) {

872

for (GroupDocs groupDocsHits : expectedGroups.groups) {

873

if (groupDocsHits.groupSortValues != null) {

874

//System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));

875

groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);

876

assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);

877

}

878

}

879

}

880

}

881

882

final SortField[] docSortFields = docSort.getSort();

883

for(int docSortIDX=0;docSortIDX<docSortFields.length;docSortIDX++) {

884

if (docSortFields[docSortIDX].getType() == SortField.SCORE) {

885

for (GroupDocs groupDocsHits : expectedGroups.groups) {

886

for(ScoreDoc _hit : groupDocsHits.scoreDocs) {

887

FieldDoc hit = (FieldDoc) _hit;

888

if (hit.fields != null) {

889

hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);

890

assertNotNull(hit.fields[docSortIDX]);

891

}

892

}

893

}

894

}

895

}

896

}

897

898

assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores);

899

assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores);

900

}

901

s.close();

902

sBlocks.close();

903

} finally {

904

FieldCache.DEFAULT.purge(r);

905

if (rBlocks != null) {

906

FieldCache.DEFAULT.purge(rBlocks);

907

}

908

}

909

910

r.close();

911

dir.close();

912

913

rBlocks.close();

914

dirBlocks.close();

915

}

916

}

917

918

private void verifyShards(int[] docStarts, TopGroups<String> topGroups) {

919

for(GroupDocs group : topGroups.groups) {

920

for(int hitIDX=0;hitIDX<group.scoreDocs.length;hitIDX++) {

921

final ScoreDoc sd = group.scoreDocs[hitIDX];

922

assertEquals("doc=" + sd.doc + " wrong shard",

923

ReaderUtil.subIndex(sd.doc, docStarts),

924

sd.shardIndex);

925

}

926

}

927

}

928

929

private void assertEquals(Collection<SearchGroup<String>> groups1, Collection<SearchGroup<String>> groups2, boolean doSortValues) {

930

assertEquals(groups1.size(), groups2.size());

931

final Iterator<SearchGroup<String>> iter1 = groups1.iterator();

932

final Iterator<SearchGroup<String>> iter2 = groups2.iterator();

933

934

while(iter1.hasNext()) {

935

assertTrue(iter2.hasNext());

936

937

SearchGroup<String> group1 = iter1.next();

938

SearchGroup<String> group2 = iter2.next();

939

940

assertEquals(group1.groupValue, group2.groupValue);

941

if (doSortValues) {

942

assertArrayEquals(group1.sortValues, group2.sortValues);

943

}

944

}

945

assertFalse(iter2.hasNext());

946

}

947

948

private TopGroups<String> searchShards(IndexSearcher topSearcher, ShardState shardState, Query query, Sort groupSort, Sort docSort, int groupOffset, int topNGroups, int docOffset,

949

int topNDocs, boolean getScores, boolean getMaxScores) throws Exception {

950

951

// TODO: swap in caching, all groups collector here

952

// too...

953

if (VERBOSE) {

954

System.out.println("TEST: " + shardState.subSearchers.length + " shards: " + Arrays.toString(shardState.subSearchers));

955

}

956

// Run 1st pass collector to get top groups per shard

957

final Weight w = topSearcher.createNormalizedWeight(query);

958

final List<Collection<SearchGroup<String>>> shardGroups = new ArrayList<Collection<SearchGroup<String>>>();

959

for(int shardIDX=0;shardIDX<shardState.subSearchers.length;shardIDX++) {

960

final TermFirstPassGroupingCollector c = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);

961

shardState.subSearchers[shardIDX].search(w, c);

962

final Collection<SearchGroup<String>> topGroups = c.getTopGroups(0, true);

963

if (topGroups != null) {

964

if (VERBOSE) {

965

System.out.println(" shard " + shardIDX + " s=" + shardState.subSearchers[shardIDX] + " " + topGroups.size() + " groups:");

966

for(SearchGroup<String> group : topGroups) {

967

System.out.println(" " + groupToString(group.groupValue) + " groupSort=" + Arrays.toString(group.sortValues));

968

}

969

}

970

shardGroups.add(topGroups);

971

}

972

}

973

974

final Collection<SearchGroup<String>> mergedTopGroups = SearchGroup.merge(shardGroups, groupOffset, topNGroups, groupSort);

975

if (VERBOSE) {

976

System.out.println(" merged:");

977

if (mergedTopGroups == null) {

978

System.out.println(" null");

979

} else {

980

for(SearchGroup<String> group : mergedTopGroups) {

981

System.out.println(" " + groupToString(group.groupValue) + " groupSort=" + Arrays.toString(group.sortValues));

982

}

983

}

984

}

985

986

if (mergedTopGroups != null) {

987

988

// Now 2nd pass:

989

@SuppressWarnings("unchecked")

990

final TopGroups<String>[] shardTopGroups = new TopGroups[shardState.subSearchers.length];

991

for(int shardIDX=0;shardIDX<shardState.subSearchers.length;shardIDX++) {

992

final TermSecondPassGroupingCollector c = new TermSecondPassGroupingCollector("group", mergedTopGroups, groupSort, docSort,

993

docOffset + topNDocs, getScores, getMaxScores, true);

994

shardState.subSearchers[shardIDX].search(w, c);

995

shardTopGroups[shardIDX] = c.getTopGroups(0);

996

rebaseDocIDs(groupSort, docSort, shardState.docStarts[shardIDX], shardTopGroups[shardIDX]);

997

}

998

999

return TopGroups.merge(shardTopGroups, groupSort, docSort, docOffset, topNDocs);

1000

} else {

1001

return null;

1002

}

1003

}

1004

1005

private List<Integer> getDocIDSortLocs(Sort sort) {

1006

List<Integer> docFieldLocs = new ArrayList<Integer>();

1007

SortField[] docFields = sort.getSort();

1008

for(int fieldIDX=0;fieldIDX<docFields.length;fieldIDX++) {

1009

if (docFields[fieldIDX].getType() == SortField.DOC) {

1010

docFieldLocs.add(fieldIDX);

1011

}

1012

}

1013

1014

return docFieldLocs;

1015

}

1016

1017

private void rebaseDocIDs(Sort groupSort, Sort docSort, int docBase, TopGroups<String> groups) {

1018

1019

List<Integer> docFieldLocs = getDocIDSortLocs(docSort);

1020

List<Integer> docGroupFieldLocs = getDocIDSortLocs(groupSort);

1021

1022

for(GroupDocs<String> group : groups.groups) {

1023

if (group.groupSortValues != null) {

1024

for(int idx : docGroupFieldLocs) {

1025

group.groupSortValues[idx] = Integer.valueOf(((Integer) group.groupSortValues[idx]).intValue() + docBase);

1026

}

1027

}

1028

1029

for(int hitIDX=0;hitIDX<group.scoreDocs.length;hitIDX++) {

1030

final ScoreDoc sd = group.scoreDocs[hitIDX];

1031

sd.doc += docBase;

1032

if (sd instanceof FieldDoc) {

1033

final FieldDoc fd = (FieldDoc) sd;

1034

if (fd.fields != null) {

1035

for(int idx : docFieldLocs) {

1036

fd.fields[idx] = Integer.valueOf(((Integer) fd.fields[idx]).intValue() + docBase);

1037

}

1038

}

1039

}

1040

}

1041

}

1042

}

1043

1044

private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual, boolean verifyGroupValues, boolean verifyTotalGroupCount, boolean verifySortValues, boolean testScores) {

1045

if (expected == null) {

1046

assertNull(actual);

1047

return;

1048

}

1049

assertNotNull(actual);

1050

1051

assertEquals(expected.groups.length, actual.groups.length);

1052

assertEquals(expected.totalHitCount, actual.totalHitCount);

1053

assertEquals(expected.totalGroupedHitCount, actual.totalGroupedHitCount);

1054

if (expected.totalGroupCount != null && verifyTotalGroupCount) {

1055

assertEquals(expected.totalGroupCount, actual.totalGroupCount);

1056

}

1057

1058

for(int groupIDX=0;groupIDX<expected.groups.length;groupIDX++) {

1059

if (VERBOSE) {

1060

System.out.println(" check groupIDX=" + groupIDX);

1061

}

1062

final GroupDocs expectedGroup = expected.groups[groupIDX];

1063

final GroupDocs actualGroup = actual.groups[groupIDX];

1064

if (verifyGroupValues) {

1065

assertEquals(expectedGroup.groupValue, actualGroup.groupValue);

1066

}

1067

if (verifySortValues) {

1068

assertArrayEquals(expectedGroup.groupSortValues, actualGroup.groupSortValues);

1069

}

1070

1071

// TODO

1072

// assertEquals(expectedGroup.maxScore, actualGroup.maxScore);

1073

assertEquals(expectedGroup.totalHits, actualGroup.totalHits);

1074

1075

final ScoreDoc[] expectedFDs = expectedGroup.scoreDocs;

1076

final ScoreDoc[] actualFDs = actualGroup.scoreDocs;

1077

1078

assertEquals(expectedFDs.length, actualFDs.length);

1079

for(int docIDX=0;docIDX<expectedFDs.length;docIDX++) {

1080

final FieldDoc expectedFD = (FieldDoc) expectedFDs[docIDX];

1081

final FieldDoc actualFD = (FieldDoc) actualFDs[docIDX];

1082

//System.out.println(" actual doc=" + docIDtoID[actualFD.doc] + " score=" + actualFD.score);

1083

assertEquals(expectedFD.doc, docIDtoID[actualFD.doc]);

1084

if (testScores) {

1085

assertEquals(expectedFD.score, actualFD.score, 0.1);

1086

} else {

1087

// TODO: too anal for now

1088

//assertEquals(Float.NaN, actualFD.score);

1089

}

1090

if (verifySortValues) {

1091

assertArrayEquals(expectedFD.fields, actualFD.fields);

1092

}

1093

}

1094

}

1095

}

1096

1097

private static class ShardSearcher {

1098

private final IndexSearcher subSearcher;

1099

1100

public ShardSearcher(IndexReader subReader) {

1101

this.subSearcher = new IndexSearcher(subReader);

1102

}

1103

1104

public void search(Weight weight, Collector collector) throws IOException {

1105

subSearcher.search(weight, null, collector);

1106

}

1107

1108

public TopDocs search(Weight weight, int topN) throws IOException {

1109

return subSearcher.search(weight, null, topN);

1110

}

1111

1112

@Override

1113

public String toString() {

1114

return "ShardSearcher(" + subSearcher + ")";

1115

}

1116

}

1117

}

Older »