2
* @(#)Search.java 1.17 06/10/30
4
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
5
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7
* This code is free software; you can redistribute it and/or modify it
8
* under the terms of the GNU General Public License version 2 only, as
9
* published by the Free Software Foundation. Sun designates this
10
* particular file as subject to the "Classpath" exception as provided
11
* by Sun in the LICENSE file that accompanied this code.
13
* This code is distributed in the hope that it will be useful, but WITHOUT
14
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16
* version 2 for more details (a copy is included in the LICENSE file that
17
* accompanied this code).
19
* You should have received a copy of the GNU General Public License version
20
* 2 along with this work; if not, write to the Free Software Foundation,
21
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
23
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
24
* CA 95054 USA or visit www.sun.com if you need additional information or
30
* @author Jacek R. Ambroziak
31
* @group Sun Microsystems Laboratories
34
package com.sun.java.help.search;
37
import javax.help.search.SearchQuery;
41
private static final int InitNConcepts = 128;
43
private SearchEnvironment _env;
45
private int _nConcepts;
46
private int _nQueries;
47
private ConceptGroupGenerator _firstGenerator = new ConceptGroupGenerator();
48
private int[] _concepts = new int[DocumentCompressor.NConceptsInGroup];
51
private int _startingIndex = 0;
52
private int _limit = 0;
53
private Query[] _query;
54
private ConceptData[] _conceptData;
55
private GeneratorHeap _genHeap = new GeneratorHeap();
56
private int _document;
57
private byte[] _data = null;
58
private int _base = 0; // index into _data
59
private NextDocGeneratorHeap _nextDocGenHeap = new NextDocGeneratorHeap();
60
private IntegerArray _kTable = new IntegerArray();
61
private IntegerArray _offsets = new IntegerArray();
62
private IntegerArray _maxConcepts = new IntegerArray();
64
private IntegerArray _docConcepts = new IntegerArray();
65
private IntegerArray _queryMasks = new IntegerArray();
66
private int _maxHitsToShow = 100;
68
public Search(SearchEnvironment se, int nColumns)
72
_query = new Query[_nQueries];
74
_size2 = InitNConcepts;
76
_conceptData = new ConceptData[_size2];
78
_query[0] = new Query(se, nColumns, null);
80
_query[1] = new Query(se, 3, null);
81
_query[2] = new Query(se, 3, null);
82
_query[3] = new Query(se, 3, null);
86
public void addTerm(int col, int concept, double score, int query)
88
if (_env.occursInText(concept))
92
ConceptData[] newArray = new ConceptData[_size2 *= 2];
93
System.arraycopy(_conceptData, 0, newArray, 0, _free2);
94
_conceptData = newArray;
96
_conceptData[_free2++] =
97
new ConceptData(concept, col, score, query,
98
_query[query].getNColumns());
102
public void startSearch(SearchQuery searchQuery)
104
// fprintf(stderr, "startSearch: setup\n");
106
// set up ConceptData lists
107
// order search terms
108
quicksort(0, _free2 - 1);
110
for (i = 0; i < _free2 - 1; i = j)
111
for (j = i + 1; j < _free2; j++)
112
if (_conceptData[i].crqEquals(_conceptData[j]))
113
_conceptData[j] = null;
117
for (i = 0; i < _free2 - 1; i = j)
118
for (j = i + 1; j < _free2; j++)
119
if (_conceptData[j] != null)
120
if (_conceptData[i].cEquals(_conceptData[j]))
122
_conceptData[i].addLast(_conceptData[j]);
123
_conceptData[j] = null;
128
for (i = 0; i < _free2 - 1; i++)
129
if (_conceptData[i] == null)
130
for (j = i + 1; j < _free2; j++)
131
if (_conceptData[j] != null)
133
_conceptData[i] = _conceptData[j];
134
_conceptData[j] = null;
137
// set up new document generators
138
_nextDocGenHeap.reset();
139
for (i = 0; i < _free2 && _conceptData[i] != null; i++)
141
NextDocGenerator gen = new NextDocGenerator(_conceptData[i], _env);
144
if (gen.getDocument() != NonnegativeIntegerGenerator.END)
147
setConceptLength(_env.
148
getConceptLength(_conceptData[i].getConcept()));
149
_nextDocGenHeap.addGenerator(gen);
152
catch (Exception e) {
156
_nextDocGenHeap.start();
158
if (searchQuery == null) {
159
printResults(_maxHitsToShow);
161
_query[0].makeEvent(_maxHitsToShow, searchQuery);
165
private void searchDocument()
167
RoleFiller[] start = new RoleFiller[_nQueries];
170
switch (nextDocument(start))
172
case 0: // multi group
173
_genHeap.start(start);
174
while (_genHeap.next(start))
178
case 1: // single group
179
if (_firstGenerator.next())
181
_firstGenerator.generateFillers(start);
182
while (_firstGenerator.next())
183
_firstGenerator.generateFillers(start);
187
case 2: // reached the end
191
catch (Exception e) {
192
e.printStackTrace(System.err);
196
for (int i = 0; i < _nQueries; i++)
199
if ((next = start[i]) != null && next != RoleFiller.STOP)
200
next.scoreList(_query[i], _document);
205
while (_nextDocGenHeap.isNonEmpty());
208
// will be called for increasing values of c
209
// searches index interval [_startingIndex, _nConcepts]
210
// saves state in _startingIndex
211
private int indexOf(int concept) throws Exception
213
int i = _startingIndex, j = _nConcepts, k;
215
if (_concepts[k = (i + j)/2] < concept)
217
else if (concept < _concepts[k])
221
_startingIndex = k + 1;
224
throw new Exception("indexOf " + concept + " not found");
227
private void printResults(int nHits)
229
for (int q = 0; q < _nQueries; q++)
231
System.out.println("query " + q);
232
if (_query[q] != null)
233
_query[q].printHits(nHits);
237
private ConceptGroupGenerator makeGenerator(int group) throws Exception
243
index = _base + _offsets.at(group - 1);
244
shift = _maxConcepts.at(group - 1);
252
// initialize generator
253
ConceptGroupGenerator gen =
254
new ConceptGroupGenerator(_data, index, _kTable.at(2*group + 1));
255
// decode concept table
256
_nConcepts = gen.decodeConcepts(_kTable.at(2*group), shift, _concepts);
258
_max = _concepts[_nConcepts] = _maxConcepts.at(group);
260
_max = _concepts[_nConcepts - 1];
261
_genHeap.addGenerator(gen);
262
_startingIndex = 0; // in _concepts; lower search index
266
// returns true if multigroup
267
private boolean openDocumentIndex(int docID) throws Exception
269
// The data is only the data for this document id. Thus the base is set
271
_data = _env.getPositions(docID);
274
int kk = _data[_base] & 0xFF, k2;
275
switch (kk >> 6) // get type
277
case 0: // single group, no extents
278
k2 = _data[_base + 1];
279
_firstGenerator.init(_data, _base += 2, k2);
280
// decode concept table
281
_nConcepts = _firstGenerator.decodeConcepts(kk & 0x3F, 0, _concepts);
284
case 2: // multi group, no extents
287
_maxConcepts.clear();
288
ByteArrayDecompressor compr =
289
new ByteArrayDecompressor(_data, _base + 1);
290
compr.decode(kk & 0x3F, _kTable);
291
compr.ascDecode(_kTable.popLast(), _offsets);
292
compr.ascDecode(_kTable.popLast(), _maxConcepts);
293
_base += 1 + compr.bytesRead();
294
_limit = _maxConcepts.cardinality();
297
case 1: // single group, extents
298
case 3: // multi group, extents
299
throw new Exception("extents not yet implemented\n");
304
private int nextDocument(RoleFiller[] start) throws Exception
306
while (_nextDocGenHeap.isNonEmpty()) // still something to do
308
for (int i = 0; i < _nQueries; i++)
309
if (_query[i] != null)
310
_query[i].resetForNextDocument();
312
// gather all concepts this document has and store associated conceptData
314
_document = _nextDocGenHeap.getDocument();
315
_docConcepts.clear();
318
_docConcepts.add(_nextDocGenHeap.getConcept());
319
_queryMasks.add(_nextDocGenHeap.getQueryMask());
320
(_conceptData[index++] = _nextDocGenHeap.getTerms()).runBy(_query);
321
_nextDocGenHeap.step();
323
while (_nextDocGenHeap.atDocument(_document));
325
// if there is no saturation model, some query will always vote YES
326
// and so every document will be opened
327
// even if this case, however, savings can be achieved by not generating fillers
328
// for some queries (not scoring, etc)
329
// and, with more care, creation of some GroupGenerators can be avoided
330
// saturating queries with lots of good hits will lead to best results
332
for (int i = 0; i < _nQueries; i++)
333
if (_query[i] != null)
334
if (_query[i].vote())
336
start[i] = null; // normal reset
340
start[i] = RoleFiller.STOP; // prohibit setting
342
// we may eliminate some ConceptGroupGenerators
343
// those which would be used only by Queries which voted NO
344
if (voteMask != 0) // need to open up document
346
ConceptGroupGenerator gen;
347
// !!! don't gather Fillers for disinterested Queries
348
if (openDocumentIndex(_document)) // multi group
350
// set up all needed generators
352
while ((_queryMasks.at(i) & voteMask) == 0)
354
// assert(i < index);
355
int c = _docConcepts.at(i);
358
while (c > _maxConcepts.at(group) && ++group < _limit)
360
gen = makeGenerator(group);
361
gen.addTerms(indexOf(c), _conceptData[i]);
363
for (++i; i < index; i++)
364
if ((_queryMasks.at(i) & voteMask) > 0)
366
c = _docConcepts.at(i);
367
if (c > _max) // need to find another group
369
// assert(group < _limit);
370
while (c > _maxConcepts.at(group) && ++group < _limit)
372
gen = makeGenerator(group);
374
gen.addTerms(indexOf(c), _conceptData[i]);
380
for (int i = 0; i < index; i++)
381
if ((_queryMasks.at(i) & voteMask) != 0)
382
_firstGenerator.addTerms(indexOf(_docConcepts.at(i)),
391
// part of quicksearch
392
private int partition(int p, int r)
394
ConceptData x = _conceptData[p];
395
int i = p - 1, j = r + 1;
398
while (x.compareWith(_conceptData[--j]))
400
while (_conceptData[++i].compareWith(x))
404
ConceptData t = _conceptData[i];
405
_conceptData[i] = _conceptData[j];
413
private void quicksort(int p, int r)
417
int q = partition(p, r);