1
package org.apache.lucene.document;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.Reader;
22
import org.apache.lucene.analysis.TokenStream;
23
import org.apache.lucene.analysis.NumericTokenStream;
24
import org.apache.lucene.index.FieldInfo.IndexOptions;
25
import org.apache.lucene.util.NumericUtils;
26
import org.apache.lucene.search.NumericRangeQuery; // javadocs
27
import org.apache.lucene.search.NumericRangeFilter; // javadocs
28
import org.apache.lucene.search.SortField; // javadocs
29
import org.apache.lucene.search.FieldCache; // javadocs
32
* <p>This class provides a {@link Field} that enables indexing
33
* of numeric values for efficient range filtering and
34
* sorting. Here's an example usage, adding an int value:
36
* document.add(new NumericField(name).setIntValue(value));
39
* For optimal performance, re-use the
40
* <code>NumericField</code> and {@link Document} instance for more than
44
* NumericField field = new NumericField(name);
45
* Document document = new Document();
46
* document.add(field);
48
* for(all documents) {
50
* field.setIntValue(value)
51
* writer.addDocument(document);
56
* <p>The java native types <code>int</code>, <code>long</code>,
57
* <code>float</code> and <code>double</code> are
58
* directly supported. However, any value that can be
59
* converted into these native types can also be indexed.
60
* For example, date/time values represented by a
61
* {@link java.util.Date} can be translated into a long
62
* value using the {@link java.util.Date#getTime} method. If you
63
* don't need millisecond precision, you can quantize the
64
* value, either by dividing the result of
65
* {@link java.util.Date#getTime} or using the separate getters
66
* (for year, month, etc.) to construct an <code>int</code> or
67
* <code>long</code> value.</p>
69
* <p>To perform range querying or filtering against a
70
* <code>NumericField</code>, use {@link NumericRangeQuery} or {@link
71
* NumericRangeFilter}. To sort according to a
72
* <code>NumericField</code>, use the normal numeric sort types, eg
73
* {@link SortField#INT}. <code>NumericField</code> values
74
* can also be loaded directly from {@link FieldCache}.</p>
76
* <p>By default, a <code>NumericField</code>'s value is not stored but
77
* is indexed for range filtering and sorting. You can use
78
* the {@link #NumericField(String,Field.Store,boolean)}
79
* constructor if you need to change these defaults.</p>
81
* <p>You may add the same field name as a <code>NumericField</code> to
82
* the same document more than once. Range querying and
83
* filtering will be the logical OR of all values; so a range query
84
* will hit all documents that have at least one value in
85
* the range. However sort behavior is not defined. If you need to sort,
86
* you should separately index a single-valued <code>NumericField</code>.</p>
88
* <p>A <code>NumericField</code> will consume somewhat more disk space
89
* in the index than an ordinary single-valued field.
90
* However, for a typical index that includes substantial
91
* textual content per document, this increase will likely
92
* be in the noise. </p>
94
* <p>Within Lucene, each numeric value is indexed as a
95
* <em>trie</em> structure, where each term is logically
96
* assigned to larger and larger pre-defined brackets (which
97
* are simply lower-precision representations of the value).
98
* The step size between each successive bracket is called the
99
* <code>precisionStep</code>, measured in bits. Smaller
100
* <code>precisionStep</code> values result in larger number
101
* of brackets, which consumes more disk space in the index
102
* but may result in faster range search performance. The
103
* default value, 4, was selected for a reasonable tradeoff
104
* of disk space consumption versus performance. You can
105
* use the expert constructor {@link
106
* #NumericField(String,int,Field.Store,boolean)} if you'd
107
* like to change the value. Note that you must also
108
* specify a congruent value when creating {@link
109
* NumericRangeQuery} or {@link NumericRangeFilter}.
110
* For low cardinality fields larger precision steps are good.
111
* If the cardinality is < 100, it is fair
112
* to use {@link Integer#MAX_VALUE}, which produces one
115
* <p>For more information on the internals of numeric trie
116
* indexing, including the <a
117
* href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
118
* configuration, see {@link NumericRangeQuery}. The format of
119
* indexed values is described in {@link NumericUtils}.
121
* <p>If you only need to sort by numeric value, and never
122
* run range querying/filtering, you can index using a
123
* <code>precisionStep</code> of {@link Integer#MAX_VALUE}.
124
* This will minimize disk space consumed. </p>
126
* <p>More advanced users can instead use {@link
127
* NumericTokenStream} directly, when indexing numbers. This
128
* class is a wrapper around this token stream type for
129
* easier, more intuitive usage.</p>
133
public final class NumericField extends AbstractField {
135
/** Data type of the value in {@link NumericField}.
138
public static enum DataType { INT, LONG, FLOAT, DOUBLE }
140
private transient NumericTokenStream numericTS;
141
private DataType type;
142
private final int precisionStep;
145
* Creates a field for numeric values using the default <code>precisionStep</code>
146
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
147
* a numeric value, before indexing a document containing this field,
148
* set a value using the various set<em>???</em>Value() methods.
149
* This constructor creates an indexed, but not stored field.
150
* @param name the field name
152
public NumericField(String name) {
153
this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
157
* Creates a field for numeric values using the default <code>precisionStep</code>
158
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
159
* a numeric value, before indexing a document containing this field,
160
* set a value using the various set<em>???</em>Value() methods.
161
* @param name the field name
162
* @param store if the field should be stored, {@link Document#getFieldable}
163
* then returns {@code NumericField} instances on search results.
164
* @param index if the field should be indexed using {@link NumericTokenStream}
166
public NumericField(String name, Field.Store store, boolean index) {
167
this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
171
* Creates a field for numeric values with the specified
172
* <code>precisionStep</code>. The instance is not yet initialized with
173
* a numeric value, before indexing a document containing this field,
174
* set a value using the various set<em>???</em>Value() methods.
175
* This constructor creates an indexed, but not stored field.
176
* @param name the field name
177
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
179
public NumericField(String name, int precisionStep) {
180
this(name, precisionStep, Field.Store.NO, true);
184
* Creates a field for numeric values with the specified
185
* <code>precisionStep</code>. The instance is not yet initialized with
186
* a numeric value, before indexing a document containing this field,
187
* set a value using the various set<em>???</em>Value() methods.
188
* @param name the field name
189
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
190
* @param store if the field should be stored, {@link Document#getFieldable}
191
* then returns {@code NumericField} instances on search results.
192
* @param index if the field should be indexed using {@link NumericTokenStream}
194
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
195
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
196
this.precisionStep = precisionStep;
197
setIndexOptions(IndexOptions.DOCS_ONLY);
200
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
201
public TokenStream tokenStreamValue() {
204
if (numericTS == null) {
205
// lazy init the TokenStream as it is heavy to instantiate (attributes,...),
206
// if not needed (stored field loading)
207
numericTS = new NumericTokenStream(precisionStep);
208
// initialize value in TokenStream
209
if (fieldsData != null) {
211
final Number val = (Number) fieldsData;
214
numericTS.setIntValue(val.intValue()); break;
216
numericTS.setLongValue(val.longValue()); break;
218
numericTS.setFloatValue(val.floatValue()); break;
220
numericTS.setDoubleValue(val.doubleValue()); break;
222
assert false : "Should never get here";
229
/** Returns always <code>null</code> for numeric fields */
231
public byte[] getBinaryValue(byte[] result){
235
/** Returns always <code>null</code> for numeric fields */
236
public Reader readerValue() {
240
/** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
241
* on search results. It is recommended to use {@link Document#getFieldable} instead
242
* that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
243
* to return the stored value. */
244
public String stringValue() {
245
return (fieldsData == null) ? null : fieldsData.toString();
248
/** Returns the current numeric value as a subclass of {@link Number}, <code>null</code> if not yet initialized. */
249
public Number getNumericValue() {
250
return (Number) fieldsData;
253
/** Returns the precision step. */
254
public int getPrecisionStep() {
255
return precisionStep;
258
/** Returns the data type of the current value, {@code null} if not yet set.
261
public DataType getDataType() {
266
* Initializes the field with the supplied <code>long</code> value.
267
* @param value the numeric value
268
* @return this instance, because of this you can use it the following way:
269
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
271
public NumericField setLongValue(final long value) {
272
if (numericTS != null) numericTS.setLongValue(value);
273
fieldsData = Long.valueOf(value);
274
type = DataType.LONG;
279
* Initializes the field with the supplied <code>int</code> value.
280
* @param value the numeric value
281
* @return this instance, because of this you can use it the following way:
282
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
284
public NumericField setIntValue(final int value) {
285
if (numericTS != null) numericTS.setIntValue(value);
286
fieldsData = Integer.valueOf(value);
292
* Initializes the field with the supplied <code>double</code> value.
293
* @param value the numeric value
294
* @return this instance, because of this you can use it the following way:
295
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
297
public NumericField setDoubleValue(final double value) {
298
if (numericTS != null) numericTS.setDoubleValue(value);
299
fieldsData = Double.valueOf(value);
300
type = DataType.DOUBLE;
305
* Initializes the field with the supplied <code>float</code> value.
306
* @param value the numeric value
307
* @return this instance, because of this you can use it the following way:
308
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
310
public NumericField setFloatValue(final float value) {
311
if (numericTS != null) numericTS.setFloatValue(value);
312
fieldsData = Float.valueOf(value);
313
type = DataType.FLOAT;