2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.solr.schema;
19
import org.apache.lucene.document.Fieldable;
20
import org.apache.lucene.document.Field;
21
import org.apache.lucene.document.NumericField;
22
import org.apache.lucene.index.FieldInfo.IndexOptions;
23
import org.apache.lucene.search.*;
24
import org.apache.lucene.util.NumericUtils;
25
import org.apache.lucene.analysis.TokenStream;
26
import org.apache.lucene.analysis.NumericTokenStream;
27
import org.apache.solr.analysis.*;
28
import org.apache.solr.common.SolrException;
29
import org.apache.solr.response.TextResponseWriter;
30
import org.apache.solr.response.XMLWriter;
31
import org.apache.solr.search.QParser;
32
import org.apache.solr.search.function.*;
34
import java.io.IOException;
35
import java.util.Locale;
37
import java.util.Date;
40
* Provides field types to support for Lucene's {@link NumericField}.
41
* See {@link org.apache.lucene.search.NumericRangeQuery} for more details.
42
* It supports integer, float, long, double and date types.
44
* For each number being added to this field, multiple terms are generated as per the algorithm described in the above
45
* link. The possible number of terms increases dramatically with lower precision steps. For
46
* the fast range search to work, trie fields must be indexed.
48
* Trie fields are sortable in numerical order and can be used in function queries.
50
* Note that if you use a precisionStep of 32 for int/float and 64 for long/double/date, then multiple terms will not be
51
* generated, range search will be no faster than any other number field, but sorting will still be possible.
53
* @version $Id: TrieField.java 1201479 2011-11-13 19:11:52Z erick $
54
* @see org.apache.lucene.search.NumericRangeQuery
57
public class TrieField extends FieldType {
58
public static final int DEFAULT_PRECISION_STEP = 8;
60
protected int precisionStepArg = TrieField.DEFAULT_PRECISION_STEP; // the one passed in or defaulted
61
protected int precisionStep; // normalized
62
protected TrieTypes type;
63
protected Object missingValue;
67
* Used for handling date types following the same semantics as DateField
69
static final DateField dateField = new DateField();
72
protected void init(IndexSchema schema, Map<String, String> args) {
73
String p = args.remove("precisionStep");
75
precisionStepArg = Integer.parseInt(p);
77
// normalize the precisionStep
78
precisionStep = precisionStepArg;
79
if (precisionStep<=0 || precisionStep>=64) precisionStep=Integer.MAX_VALUE;
80
String t = args.remove("type");
84
type = TrieTypes.valueOf(t.toUpperCase(Locale.ENGLISH));
85
} catch (IllegalArgumentException e) {
86
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
87
"Invalid type specified in schema.xml for field: " + args.get("name"), e);
92
CharFilterFactory[] filterFactories = new CharFilterFactory[0];
93
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
94
analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, precisionStep), tokenFilterFactories);
95
// for query time we only need one token, so we use the biggest possible precisionStep:
96
queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, Integer.MAX_VALUE), tokenFilterFactories);
100
public Object toObject(Fieldable f) {
101
if (f instanceof NumericField) {
102
final Number val = ((NumericField) f).getNumericValue();
103
if (val==null) return badFieldString(f);
104
return (type == TrieTypes.DATE) ? new Date(val.longValue()) : val;
106
// the following code is "deprecated" and only to support pre-3.2 indexes using the old BinaryField encoding:
107
final byte[] arr = f.getBinaryValue();
108
if (arr==null) return badFieldString(f);
113
return Float.intBitsToFloat(toInt(arr));
117
return Double.longBitsToDouble(toLong(arr));
119
return new Date(toLong(arr));
121
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
127
public SortField getSortField(SchemaField field, boolean top) {
128
field.checkSortability();
130
Object missingValue = null;
131
boolean sortMissingLast = field.sortMissingLast();
132
boolean sortMissingFirst = field.sortMissingFirst();
136
if( sortMissingLast ) {
137
missingValue = top ? Integer.MIN_VALUE : Integer.MAX_VALUE;
139
else if( sortMissingFirst ) {
140
missingValue = top ? Integer.MAX_VALUE : Integer.MIN_VALUE;
142
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top).setMissingValue(missingValue);
145
if( sortMissingLast ) {
146
missingValue = top ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
148
else if( sortMissingFirst ) {
149
missingValue = top ? Float.POSITIVE_INFINITY : Float.NEGATIVE_INFINITY;
151
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top).setMissingValue(missingValue);
153
case DATE: // fallthrough
155
if( sortMissingLast ) {
156
missingValue = top ? Long.MIN_VALUE : Long.MAX_VALUE;
158
else if( sortMissingFirst ) {
159
missingValue = top ? Long.MAX_VALUE : Long.MIN_VALUE;
161
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top).setMissingValue(missingValue);
164
if( sortMissingLast ) {
165
missingValue = top ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
167
else if( sortMissingFirst ) {
168
missingValue = top ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
170
return new SortField( field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top).setMissingValue(missingValue);
173
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
178
public ValueSource getValueSource(SchemaField field, QParser qparser) {
179
field.checkFieldCacheSource(qparser);
182
return new IntFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER);
184
return new FloatFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER);
186
return new TrieDateFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER);
188
return new LongFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER);
190
return new DoubleFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER);
192
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
197
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
198
xmlWriter.writeVal(name, toObject(f));
202
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
203
writer.writeVal(name, toObject(f));
207
public boolean isTokenized() {
212
public boolean multiValuedFieldCache() {
217
* @return the precisionStep used to index values into the field
219
public int getPrecisionStep() {
220
return precisionStepArg;
224
* @return the type of this field
226
public TrieTypes getType() {
231
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
232
int ps = precisionStep;
236
query = NumericRangeQuery.newIntRange(field.getName(), ps,
237
min == null ? null : Integer.parseInt(min),
238
max == null ? null : Integer.parseInt(max),
239
minInclusive, maxInclusive);
242
query = NumericRangeQuery.newFloatRange(field.getName(), ps,
243
min == null ? null : Float.parseFloat(min),
244
max == null ? null : Float.parseFloat(max),
245
minInclusive, maxInclusive);
248
query = NumericRangeQuery.newLongRange(field.getName(), ps,
249
min == null ? null : Long.parseLong(min),
250
max == null ? null : Long.parseLong(max),
251
minInclusive, maxInclusive);
254
query = NumericRangeQuery.newDoubleRange(field.getName(), ps,
255
min == null ? null : Double.parseDouble(min),
256
max == null ? null : Double.parseDouble(max),
257
minInclusive, maxInclusive);
260
query = NumericRangeQuery.newLongRange(field.getName(), ps,
261
min == null ? null : dateField.parseMath(null, min).getTime(),
262
max == null ? null : dateField.parseMath(null, max).getTime(),
263
minInclusive, maxInclusive);
266
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
273
static int toInt(byte[] arr) {
274
return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
278
static long toLong(byte[] arr) {
279
int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
280
int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
281
return (((long)high)<<32) | (low&0x0ffffffffL);
285
public String storedToReadable(Fieldable f) {
286
return toExternal(f);
290
public String readableToIndexed(String val) {
293
return NumericUtils.intToPrefixCoded(Integer.parseInt(val));
295
return NumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(Float.parseFloat(val)));
297
return NumericUtils.longToPrefixCoded(Long.parseLong(val));
299
return NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(Double.parseDouble(val)));
301
return NumericUtils.longToPrefixCoded(dateField.parseMath(null, val).getTime());
303
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
308
public String toInternal(String val) {
309
return readableToIndexed(val);
313
static String badFieldString(Fieldable f) {
314
String s = f.stringValue();
315
return "ERROR:SCHEMA-INDEX-MISMATCH,stringValue="+s;
319
public String toExternal(Fieldable f) {
320
return (type == TrieTypes.DATE)
321
? dateField.toExternal((Date) toObject(f))
322
: toObject(f).toString();
326
public String indexedToReadable(String indexedForm) {
329
return Integer.toString( NumericUtils.prefixCodedToInt(indexedForm) );
331
return Float.toString( NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(indexedForm)) );
333
return Long.toString( NumericUtils.prefixCodedToLong(indexedForm) );
335
return Double.toString( NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(indexedForm)) );
337
return dateField.toExternal( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
339
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
344
public String storedToIndexed(Fieldable f) {
345
if (f instanceof NumericField) {
346
final Number val = ((NumericField) f).getNumericValue();
348
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name());
351
return NumericUtils.intToPrefixCoded(val.intValue());
353
return NumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(val.floatValue()));
354
case LONG: //fallthrough!
356
return NumericUtils.longToPrefixCoded(val.longValue());
358
return NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(val.doubleValue()));
360
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
363
// the following code is "deprecated" and only to support pre-3.2 indexes using the old BinaryField encoding:
364
final byte[] arr = f.getBinaryValue();
366
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name());
369
return NumericUtils.intToPrefixCoded(toInt(arr));
371
// WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
372
// copied from NumericUtils to not convert to/from float two times
373
// code in next 2 lines is identical to: int v = NumericUtils.floatToSortableInt(Float.intBitsToFloat(toInt(arr)));
375
if (v<0) v ^= 0x7fffffff;
376
return NumericUtils.intToPrefixCoded(v);
378
case LONG: //fallthrough!
380
return NumericUtils.longToPrefixCoded(toLong(arr));
382
// WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
383
// copied from NumericUtils to not convert to/from double two times
384
// code in next 2 lines is identical to: long v = NumericUtils.doubleToSortableLong(Double.longBitsToDouble(toLong(arr)));
385
long v = toLong(arr);
386
if (v<0) v ^= 0x7fffffffffffffffL;
387
return NumericUtils.longToPrefixCoded(v);
390
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
396
public Fieldable createField(SchemaField field, String externalVal, float boost) {
397
boolean indexed = field.indexed();
398
boolean stored = field.stored();
400
if (!indexed && !stored) {
401
if (log.isTraceEnabled())
402
log.trace("Ignoring unindexed/unstored field: " + field);
406
final NumericField f = new NumericField(field.getName(), precisionStep, stored ? Field.Store.YES : Field.Store.NO, indexed);
409
f.setIntValue(Integer.parseInt(externalVal));
412
f.setFloatValue(Float.parseFloat(externalVal));
415
f.setLongValue(Long.parseLong(externalVal));
418
f.setDoubleValue(Double.parseDouble(externalVal));
421
f.setLongValue(dateField.parseMath(null, externalVal).getTime());
424
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
427
f.setOmitNorms(field.omitNorms());
428
f.setIndexOptions(getIndexOptions(field, externalVal));
433
public enum TrieTypes {
442
static final String INT_PREFIX = new String(new char[]{NumericUtils.SHIFT_START_INT});
443
static final String LONG_PREFIX = new String(new char[]{NumericUtils.SHIFT_START_LONG});
445
/** expert internal use, subject to change.
446
* Returns null if no prefix or prefix not needed, or the prefix of the main value of a trie field
447
* that indexes multiple precisions per value.
449
public static String getMainValuePrefix(FieldType ft) {
450
if (ft instanceof TrieDateField)
451
ft = ((TrieDateField) ft).wrappedField;
452
if (ft instanceof TrieField) {
453
final TrieField trie = (TrieField)ft;
454
if (trie.precisionStep == Integer.MAX_VALUE)
465
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + trie.type);
472
class TrieDateFieldSource extends LongFieldSource {
474
public TrieDateFieldSource(String field, FieldCache.LongParser parser) {
475
super(field, parser);
479
public String description() {
480
return "date(" + field + ')';
484
public long externalToLong(String extVal) {
485
return TrieField.dateField.parseMath(null, extVal).getTime();