2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.solr.handler.admin;
20
import java.io.IOException;
21
import java.net.MalformedURLException;
23
import java.util.ArrayList;
24
import java.util.Collection;
25
import java.util.Date;
26
import java.util.HashMap;
27
import java.util.HashSet;
28
import java.util.LinkedList;
29
import java.util.List;
32
import org.slf4j.Logger;
33
import org.slf4j.LoggerFactory;
35
import org.apache.lucene.analysis.Analyzer;
36
import org.apache.lucene.document.Document;
37
import org.apache.lucene.document.Fieldable;
38
import org.apache.lucene.index.IndexReader;
39
import org.apache.lucene.index.Term;
40
import org.apache.lucene.index.TermEnum;
41
import org.apache.lucene.index.TermFreqVector;
42
import org.apache.lucene.search.Query;
43
import org.apache.lucene.search.TermRangeQuery;
44
import org.apache.lucene.search.TopDocs;
45
import org.apache.lucene.store.Directory;
46
import org.apache.lucene.util.PriorityQueue;
47
import org.apache.solr.analysis.CharFilterFactory;
48
import org.apache.solr.analysis.TokenFilterFactory;
49
import org.apache.solr.analysis.TokenizerChain;
50
import org.apache.solr.analysis.TokenizerFactory;
51
import org.apache.solr.common.SolrException;
52
import org.apache.solr.common.luke.FieldFlag;
53
import org.apache.solr.common.params.CommonParams;
54
import org.apache.solr.common.params.SolrParams;
55
import org.apache.solr.common.util.NamedList;
56
import org.apache.solr.common.util.SimpleOrderedMap;
57
import org.apache.solr.common.util.Base64;
58
import org.apache.solr.handler.RequestHandlerBase;
59
import org.apache.solr.request.SolrQueryRequest;
60
import org.apache.solr.response.SolrQueryResponse;
61
import org.apache.solr.schema.FieldType;
62
import org.apache.solr.schema.IndexSchema;
63
import org.apache.solr.schema.SchemaField;
64
import org.apache.solr.search.SolrIndexSearcher;
67
* This handler exposes the internal lucene index. It is inspired by and
68
* modeled on Luke, the Lucene Index Browser by Andrzej Bialecki.
69
* http://www.getopt.org/luke/
71
* NOTE: the response format is still likely to change. It should be designed so
72
* that it works nicely with an XSLT transformation. Until we have a nice
73
* XSLT front end for /admin, the format is still open to change.
76
* For more documentation see:
77
* http://wiki.apache.org/solr/LukeRequestHandler
79
* @version $Id: LukeRequestHandler.java 1201265 2011-11-12 14:09:28Z mikemccand $
82
public class LukeRequestHandler extends RequestHandlerBase
84
private static Logger log = LoggerFactory.getLogger(LukeRequestHandler.class);
86
public static final String NUMTERMS = "numTerms";
87
public static final String DOC_ID = "docId";
88
public static final String ID = "id";
89
public static final int DEFAULT_COUNT = 10;
92
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
94
IndexSchema schema = req.getSchema();
95
SolrIndexSearcher searcher = req.getSearcher();
96
IndexReader reader = searcher.getReader();
97
SolrParams params = req.getParams();
98
int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
100
// Always show the core lucene info
101
rsp.add("index", getIndexInfo(reader, numTerms>0 ) );
103
Integer docId = params.getInt( DOC_ID );
104
if( docId == null && params.get( ID ) != null ) {
105
// Look for something with a given solr ID
106
SchemaField uniqueKey = schema.getUniqueKeyField();
107
String v = uniqueKey.getType().toInternal( params.get(ID) );
108
Term t = new Term( uniqueKey.getName(), v );
109
docId = searcher.getFirstMatch( t );
111
throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: "+params.get( ID ) );
115
// Read the document from the index
116
if( docId != null ) {
119
doc = reader.document( docId );
121
catch( Exception ex ) {}
123
throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: "+docId );
126
SimpleOrderedMap<Object> info = getDocumentFieldsInfo( doc, docId, reader, schema );
128
SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<Object>();
129
docinfo.add( "docId", docId );
130
docinfo.add( "lucene", info );
131
docinfo.add( "solr", doc );
132
rsp.add( "doc", docinfo );
134
else if ( "schema".equals( params.get( "show" ) ) ) {
135
rsp.add( "schema", getSchemaInfo( req.getSchema() ) );
138
// If no doc is given, show all fields and top terms
139
Set<String> fields = null;
140
if( params.get( CommonParams.FL ) != null ) {
141
fields = new HashSet<String>();
142
for( String f : params.getParams( CommonParams.FL ) ) {
146
rsp.add( "fields", getIndexedFieldsInfo( searcher, fields, numTerms ) ) ;
149
// Add some generally helpful information
150
NamedList<Object> info = new SimpleOrderedMap<Object>();
151
info.add( "key", getFieldFlagsKey() );
152
info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents." );
153
rsp.add( "info", info );
154
rsp.setHttpCaching(false);
160
* @return a string representing a Fieldable's flags.
162
private static String getFieldFlags( Fieldable f )
164
StringBuilder flags = new StringBuilder();
165
flags.append( (f != null && f.isIndexed()) ? FieldFlag.INDEXED.getAbbreviation() : '-' );
166
flags.append( (f != null && f.isTokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-' );
167
flags.append( (f != null && f.isStored()) ? FieldFlag.STORED.getAbbreviation() : '-' );
168
flags.append( (false) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-' ); // SchemaField Specific
169
flags.append( (f != null && f.isTermVectorStored()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
170
flags.append( (f != null && f.isStoreOffsetWithTermVector()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
171
flags.append( (f != null && f.isStorePositionWithTermVector()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
172
flags.append( (f != null && f.getOmitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
173
flags.append( (f != null && f.isLazy()) ? FieldFlag.LAZY.getAbbreviation() : '-' );
174
flags.append( (f != null && f.isBinary()) ? FieldFlag.BINARY.getAbbreviation() : '-' );
175
flags.append( (false) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' ); // SchemaField Specific
176
flags.append( (false) ? FieldFlag.SORT_MISSING_LAST.getAbbreviation() : '-' ); // SchemaField Specific
177
return flags.toString();
181
* @return a string representing a SchemaField's flags.
183
private static String getFieldFlags( SchemaField f )
185
FieldType t = (f==null) ? null : f.getType();
187
// see: http://www.nabble.com/schema-field-properties-tf3437753.html#a9585549
188
boolean lazy = false; // "lazy" is purely a property of reading fields
189
boolean binary = false; // Currently not possible
191
StringBuilder flags = new StringBuilder();
192
flags.append( (f != null && f.indexed()) ? FieldFlag.INDEXED.getAbbreviation() : '-' );
193
flags.append( (t != null && t.isTokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-' );
194
flags.append( (f != null && f.stored()) ? FieldFlag.STORED.getAbbreviation() : '-' );
195
flags.append( (f != null && f.multiValued()) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-' );
196
flags.append( (f != null && f.storeTermVector() ) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
197
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
198
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
199
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
200
flags.append( (f != null &&
201
f.omitTermFreqAndPositions() ) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
202
flags.append( (f != null && f.omitPositions() ) ? FieldFlag.OMIT_POSITIONS.getAbbreviation() : '-' );
203
flags.append( (lazy) ? FieldFlag.LAZY.getAbbreviation() : '-' );
204
flags.append( (binary) ? FieldFlag.BINARY.getAbbreviation() : '-' );
205
flags.append( (f != null && f.sortMissingFirst() ) ? FieldFlag.SORT_MISSING_FIRST.getAbbreviation() : '-' );
206
flags.append( (f != null && f.sortMissingLast() ) ? FieldFlag.SORT_MISSING_LAST.getAbbreviation() : '-' );
207
return flags.toString();
211
* @return a key to what each character means
213
public static SimpleOrderedMap<String> getFieldFlagsKey()
215
SimpleOrderedMap<String> key = new SimpleOrderedMap<String>();
216
key.add(String.valueOf(FieldFlag.INDEXED.getAbbreviation()), FieldFlag.INDEXED.getDisplay() );
217
key.add(String.valueOf(FieldFlag.TOKENIZED.getAbbreviation()), FieldFlag.TOKENIZED.getDisplay() );
218
key.add( String.valueOf(FieldFlag.STORED.getAbbreviation()), FieldFlag.STORED.getDisplay() );
219
key.add( String.valueOf(FieldFlag.MULTI_VALUED.getAbbreviation()), FieldFlag.MULTI_VALUED.getDisplay() );
220
key.add( String.valueOf(FieldFlag.TERM_VECTOR_STORED.getAbbreviation()), FieldFlag.TERM_VECTOR_STORED.getDisplay() );
221
key.add( String.valueOf(FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation()), FieldFlag.TERM_VECTOR_OFFSET.getDisplay() );
222
key.add( String.valueOf(FieldFlag.TERM_VECTOR_POSITION.getAbbreviation()), FieldFlag.TERM_VECTOR_POSITION.getDisplay() );
223
key.add( String.valueOf(FieldFlag.OMIT_NORMS.getAbbreviation()), FieldFlag.OMIT_NORMS.getDisplay() );
224
key.add( String.valueOf(FieldFlag.LAZY.getAbbreviation()), FieldFlag.LAZY.getDisplay() );
225
key.add( String.valueOf(FieldFlag.BINARY.getAbbreviation()), FieldFlag.BINARY.getDisplay() );
226
key.add( String.valueOf(FieldFlag.SORT_MISSING_FIRST.getAbbreviation()), FieldFlag.SORT_MISSING_FIRST.getDisplay() );
227
key.add( String.valueOf(FieldFlag.SORT_MISSING_LAST.getAbbreviation()), FieldFlag.SORT_MISSING_LAST.getDisplay() );
231
private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema ) throws IOException
233
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
234
for( Object o : doc.getFields() ) {
235
Fieldable fieldable = (Fieldable)o;
236
SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
238
SchemaField sfield = schema.getFieldOrNull( fieldable.name() );
239
FieldType ftype = (sfield==null)?null:sfield.getType();
241
f.add( "type", (ftype==null)?null:ftype.getTypeName() );
242
f.add( "schema", getFieldFlags( sfield ) );
243
f.add( "flags", getFieldFlags( fieldable ) );
245
Term t = new Term(fieldable.name(), ftype!=null ? ftype.storedToIndexed(fieldable) : fieldable.stringValue());
247
f.add( "value", (ftype==null)?null:ftype.toExternal( fieldable ) );
249
// TODO: this really should be "stored"
250
f.add( "internal", fieldable.stringValue() ); // may be a binary number
252
byte[] arr = fieldable.getBinaryValue();
254
f.add( "binary", Base64.byteArrayToBase64(arr, 0, arr.length));
256
f.add( "boost", fieldable.getBoost() );
257
f.add( "docFreq", t.text()==null ? 0 : reader.docFreq( t ) ); // this can be 0 for non-indexed fields
259
// If we have a term vector, return that
260
if( fieldable.isTermVectorStored() ) {
262
TermFreqVector v = reader.getTermFreqVector( docId, fieldable.name() );
264
SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
265
for( int i=0; i<v.size(); i++ ) {
266
tfv.add( v.getTerms()[i], v.getTermFrequencies()[i] );
268
f.add( "termVector", tfv );
271
catch( Exception ex ) {
272
log.warn( "error writing term vector", ex );
276
finfo.add( fieldable.name(), f );
281
@SuppressWarnings("unchecked")
282
private static SimpleOrderedMap<Object> getIndexedFieldsInfo(
283
final SolrIndexSearcher searcher, final Set<String> fields, final int numTerms )
286
IndexReader reader = searcher.getReader();
287
IndexSchema schema = searcher.getSchema();
289
// Walk the term enum and keep a priority queue for each map in our set
290
Map<String,TopTermQueue> ttinfo = null;
292
ttinfo = getTopTerms(reader, fields, numTerms, null );
294
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
295
Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
296
for (String fieldName : fieldNames) {
297
if( fields != null && !fields.contains( fieldName ) ) {
298
continue; // if a field is specified, only them
301
SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
303
SchemaField sfield = schema.getFieldOrNull( fieldName );
304
FieldType ftype = (sfield==null)?null:sfield.getType();
306
f.add( "type", (ftype==null)?null:ftype.getTypeName() );
307
f.add( "schema", getFieldFlags( sfield ) );
308
if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
309
f.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
312
// If numTerms==0, the call is just asking for a quick field list
313
if( ttinfo != null && sfield != null && sfield.indexed() ) {
314
Query q = new TermRangeQuery(fieldName,null,null,false,false);
315
TopDocs top = searcher.search( q, 1 );
316
if( top.totalHits > 0 ) {
317
// Find a document with this field
319
Document doc = searcher.doc( top.scoreDocs[0].doc );
320
Fieldable fld = doc.getFieldable( fieldName );
322
f.add( "index", getFieldFlags( fld ) );
325
// it is a non-stored field...
326
f.add( "index", "(unstored field)" );
329
catch( Exception ex ) {
330
log.warn( "error reading field: "+fieldName );
333
f.add( "docs", top.totalHits );
335
TopTermQueue topTerms = ttinfo.get( fieldName );
336
if( topTerms != null ) {
337
f.add( "distinct", topTerms.distinctTerms );
340
f.add( "topTerms", topTerms.toNamedList( searcher.getSchema() ) );
343
f.add( "histogram", topTerms.histogram.toNamedList() );
348
finfo.add( fieldName, f );
354
* Return info from the index
356
private static SimpleOrderedMap<Object> getSchemaInfo( IndexSchema schema ) {
357
Map<String, List<String>> typeusemap = new HashMap<String, List<String>>();
358
SimpleOrderedMap<Object> fields = new SimpleOrderedMap<Object>();
359
SchemaField uniqueField = schema.getUniqueKeyField();
360
for( SchemaField f : schema.getFields().values() ) {
361
populateFieldInfo(schema, typeusemap, fields, uniqueField, f);
364
SimpleOrderedMap<Object> dynamicFields = new SimpleOrderedMap<Object>();
365
for (SchemaField f : schema.getDynamicFieldPrototypes()) {
366
populateFieldInfo(schema, typeusemap, dynamicFields, uniqueField, f);
368
SimpleOrderedMap<Object> types = new SimpleOrderedMap<Object>();
369
for( FieldType ft : schema.getFieldTypes().values() ) {
370
SimpleOrderedMap<Object> field = new SimpleOrderedMap<Object>();
371
field.add("fields", typeusemap.get( ft.getTypeName() ) );
372
field.add("tokenized", ft.isTokenized() );
373
field.add("className", ft.getClass().getName());
374
field.add("indexAnalyzer", getAnalyzerInfo(ft.getAnalyzer()));
375
field.add("queryAnalyzer", getAnalyzerInfo(ft.getQueryAnalyzer()));
376
types.add( ft.getTypeName(), field );
379
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
380
finfo.add("fields", fields);
381
finfo.add("dynamicFields", dynamicFields);
382
finfo.add("uniqueKeyField",
383
null == uniqueField ? null : uniqueField.getName());
384
finfo.add("defaultSearchField", schema.getDefaultSearchFieldName());
385
finfo.add("types", types);
390
private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) {
391
SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>();
392
aninfo.add("className", analyzer.getClass().getName());
393
if (analyzer instanceof TokenizerChain) {
395
TokenizerChain tchain = (TokenizerChain)analyzer;
397
CharFilterFactory[] cfiltfacs = tchain.getCharFilterFactories();
398
SimpleOrderedMap<Map<String, Object>> cfilters = new SimpleOrderedMap<Map<String, Object>>();
399
for (CharFilterFactory cfiltfac : cfiltfacs) {
400
Map<String, Object> tok = new HashMap<String, Object>();
401
String className = cfiltfac.getClass().getName();
402
tok.put("className", className);
403
tok.put("args", cfiltfac.getArgs());
404
cfilters.add(className.substring(className.lastIndexOf('.')+1), tok);
406
if (cfilters.size() > 0) {
407
aninfo.add("charFilters", cfilters);
410
SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>();
411
TokenizerFactory tfac = tchain.getTokenizerFactory();
412
tokenizer.add("className", tfac.getClass().getName());
413
tokenizer.add("args", tfac.getArgs());
414
aninfo.add("tokenizer", tokenizer);
416
TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories();
417
SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>();
418
for (TokenFilterFactory filtfac : filtfacs) {
419
Map<String, Object> tok = new HashMap<String, Object>();
420
String className = filtfac.getClass().getName();
421
tok.put("className", className);
422
tok.put("args", filtfac.getArgs());
423
filters.add(className.substring(className.lastIndexOf('.')+1), tok);
425
if (filters.size() > 0) {
426
aninfo.add("filters", filters);
432
private static void populateFieldInfo(IndexSchema schema,
433
Map<String, List<String>> typeusemap, SimpleOrderedMap<Object> fields,
434
SchemaField uniqueField, SchemaField f) {
435
FieldType ft = f.getType();
436
SimpleOrderedMap<Object> field = new SimpleOrderedMap<Object>();
437
field.add( "type", ft.getTypeName() );
438
field.add( "flags", getFieldFlags(f) );
439
if( f.isRequired() ) {
440
field.add( "required", f.isRequired() );
442
if( f.getDefaultValue() != null ) {
443
field.add( "default", f.getDefaultValue() );
445
if (f == uniqueField){
446
field.add("uniqueKey", true);
448
if (ft.getAnalyzer().getPositionIncrementGap(f.getName()) != 0) {
449
field.add("positionIncrementGap", ft.getAnalyzer().getPositionIncrementGap(f.getName()));
451
field.add("copyDests", schema.getCopyFields(f.getName()));
452
field.add("copySources", schema.getCopySources(f.getName()));
455
fields.add( f.getName(), field );
457
List<String> v = typeusemap.get( ft.getTypeName() );
459
v = new ArrayList<String>();
461
v.add( f.getName() );
462
typeusemap.put( ft.getTypeName(), v );
465
public static SimpleOrderedMap<Object> getIndexInfo( IndexReader reader, boolean countTerms ) throws IOException {
466
Directory dir = reader.directory();
467
SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<Object>();
469
indexInfo.add("numDocs", reader.numDocs());
470
indexInfo.add("maxDoc", reader.maxDoc());
480
indexInfo.add("numTerms", numTerms );
483
if( te != null ) te.close();
487
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
488
indexInfo.add("segmentCount", reader.getSequentialSubReaders().length);
489
indexInfo.add("current", reader.isCurrent() );
490
indexInfo.add("hasDeletions", reader.hasDeletions() );
491
indexInfo.add("directory", dir );
492
indexInfo.add("lastModified", new Date(IndexReader.lastModified(dir)) );
496
//////////////////////// SolrInfoMBeans methods //////////////////////
499
public String getDescription() {
500
return "Lucene Index Browser. Inspired and modeled after Luke: http://www.getopt.org/luke/";
504
public String getVersion() {
505
return "$Revision: 1201265 $";
509
public String getSourceId() {
510
return "$Id: LukeRequestHandler.java 1201265 2011-11-12 14:09:28Z mikemccand $";
514
public String getSource() {
515
return "$URL: http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_5_0/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java $";
519
public URL[] getDocs() {
521
return new URL[] { new URL("http://wiki.apache.org/solr/LukeRequestHandler") };
523
catch( MalformedURLException ex ) { return null; }
526
///////////////////////////////////////////////////////////////////////////////////////
528
static class TermHistogram
531
public Map<Integer,Integer> hist = new HashMap<Integer, Integer>();
533
private static final double LOG2 = Math.log( 2 );
534
public static int getPowerOfTwoBucket( int num )
536
return Math.max(1, Integer.highestOneBit(num-1) << 1);
539
public void add( int df )
541
Integer bucket = getPowerOfTwoBucket( df );
542
if( bucket > maxBucket ) {
545
Integer old = hist.get( bucket );
547
hist.put( bucket, 1 );
550
hist.put( bucket, old+1 );
554
// TODO? should this be a list or a map?
555
public NamedList<Integer> toNamedList()
557
NamedList<Integer> nl = new NamedList<Integer>();
558
for( int bucket = 1; bucket <= maxBucket; bucket *= 2 ) {
559
Integer val = hist.get( bucket );
563
nl.add( ""+bucket, val );
570
* Private internal class that counts up frequent terms
572
private static class TopTermQueue extends PriorityQueue
574
static class TermInfo {
575
TermInfo(Term t, int df) {
583
public int minFreq = 0;
584
public int distinctTerms = 0;
585
public TermHistogram histogram;
587
TopTermQueue(int size) {
589
histogram = new TermHistogram();
593
protected final boolean lessThan(Object a, Object b) {
594
TermInfo termInfoA = (TermInfo)a;
595
TermInfo termInfoB = (TermInfo)b;
596
return termInfoA.docFreq < termInfoB.docFreq;
600
* This is a destructive call... the queue is empty at the end
602
public NamedList<Integer> toNamedList( IndexSchema schema )
604
// reverse the list..
605
List<TermInfo> aslist = new LinkedList<TermInfo>();
606
while( size() > 0 ) {
607
aslist.add( 0, (TermInfo)pop() );
610
NamedList<Integer> list = new NamedList<Integer>();
611
for (TermInfo i : aslist) {
612
String txt = i.term.text();
613
SchemaField ft = schema.getFieldOrNull( i.term.field() );
615
txt = ft.getType().indexedToReadable( txt );
617
list.add( txt, i.docFreq );
623
private static Map<String,TopTermQueue> getTopTerms( IndexReader reader, Set<String> fields, int numTerms, Set<String> junkWords ) throws Exception
625
Map<String,TopTermQueue> info = new HashMap<String, TopTermQueue>();
627
TermEnum terms = null;
629
terms = reader.terms();
630
while (terms.next()) {
631
String field = terms.term().field();
632
String t = terms.term().text();
634
// Compute distinct terms for every field
635
TopTermQueue tiq = info.get( field );
637
tiq = new TopTermQueue( numTerms+1 );
638
info.put( field, tiq );
641
tiq.histogram.add( terms.docFreq() ); // add the term to the histogram
643
// Only save the distinct terms for fields we worry about
644
if (fields != null && fields.size() > 0) {
645
if( !fields.contains( field ) ) {
649
if( junkWords != null && junkWords.contains( t ) ) {
653
if( terms.docFreq() > tiq.minFreq ) {
654
tiq.add(new TopTermQueue.TermInfo(terms.term(), terms.docFreq()));
655
if (tiq.size() > numTerms) { // if tiq full
656
tiq.pop(); // remove lowest in tiq
657
tiq.minFreq = ((TopTermQueue.TermInfo)tiq.top()).docFreq; // reset minFreq
663
if( terms != null ) terms.close();