1
package org.apache.lucene.util.packed;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.store.DataInput;
21
import org.apache.lucene.store.DataOutput;
22
import org.apache.lucene.util.CodecUtil;
23
import org.apache.lucene.util.Constants;
25
import java.io.IOException;
28
* Simplistic compression for array of unsigned long values.
29
* Each value is >= 0 and <= a specified maximum value. The
30
* values are stored as packed ints, with each value
31
* consuming a fixed number of bits.
36
public class PackedInts {
38
private final static String CODEC_NAME = "PackedInts";
39
private final static int VERSION_START = 0;
40
private final static int VERSION_CURRENT = VERSION_START;
43
* A read-only random access array of positive integers.
46
public static interface Reader {
48
* @param index the position of the wanted value.
49
* @return the value at the stated index.
54
* @return the number of bits used to store any given value.
55
* Note: This does not imply that memory usage is
56
* {@code bitsPerValue * #values} as implementations are free to
57
* use non-space-optimal packing of bits.
59
int getBitsPerValue();
62
* @return the number of values.
67
* Expert: if the bit-width of this reader matches one of
68
* java's native types, returns the underlying array
69
* (ie, byte[], short[], int[], long[]); else, returns
70
* null. Note that when accessing the array you must
71
* upgrade the type (bitwise AND with all ones), to
72
* interpret the full value as unsigned. Ie,
73
* bytes[idx]&0xFF, shorts[idx]&0xFFFF, etc.
78
* Returns true if this implementation is backed by a
87
* A packed integer array that can be modified.
90
public static interface Mutable extends Reader {
92
* Set the value at the given index in the array.
93
* @param index where the value should be positioned.
94
* @param value a value conforming to the constraints set by the array.
96
void set(int index, long value);
99
* Sets all values to 0.
106
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
109
public static abstract class ReaderImpl implements Reader {
110
protected final int bitsPerValue;
111
protected final int valueCount;
113
protected ReaderImpl(int valueCount, int bitsPerValue) {
114
this.bitsPerValue = bitsPerValue;
115
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
116
this.valueCount = valueCount;
119
public int getBitsPerValue() {
127
public long getMaxValue() { // Convenience method
128
return maxValue(bitsPerValue);
131
public Object getArray() {
135
public boolean hasArray() {
140
/** A write-once Writer.
143
public static abstract class Writer {
144
protected final DataOutput out;
145
protected final int bitsPerValue;
146
protected final int valueCount;
148
protected Writer(DataOutput out, int valueCount, int bitsPerValue)
150
assert bitsPerValue <= 64;
153
this.valueCount = valueCount;
154
this.bitsPerValue = bitsPerValue;
155
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
156
out.writeVInt(bitsPerValue);
157
out.writeVInt(valueCount);
160
public abstract void add(long v) throws IOException;
161
public abstract void finish() throws IOException;
165
* Retrieve PackedInt data from the DataInput and return a packed int
166
* structure based on it.
167
* @param in positioned at the beginning of a stored packed int structure.
168
* @return a read only random access capable array of positive integers.
169
* @throws IOException if the structure could not be retrieved.
172
public static Reader getReader(DataInput in) throws IOException {
173
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
174
final int bitsPerValue = in.readVInt();
175
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
176
final int valueCount = in.readVInt();
178
switch (bitsPerValue) {
180
return new Direct8(in, valueCount);
182
return new Direct16(in, valueCount);
184
return new Direct32(in, valueCount);
186
return new Direct64(in, valueCount);
188
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
189
return new Packed64(in, valueCount, bitsPerValue);
191
return new Packed32(in, valueCount, bitsPerValue);
197
* Create a packed integer array with the given amount of values initialized
198
* to 0. the valueCount and the bitsPerValue cannot be changed after creation.
199
* All Mutables known by this factory are kept fully in RAM.
200
* @param valueCount the number of elements.
201
* @param bitsPerValue the number of bits available for any given value.
202
* @return a mutable packed integer array.
203
* @throws java.io.IOException if the Mutable could not be created. With the
204
* current implementations, this never happens, but the method
205
* signature allows for future persistence-backed Mutables.
208
public static Mutable getMutable(
209
int valueCount, int bitsPerValue) {
210
switch (bitsPerValue) {
212
return new Direct8(valueCount);
214
return new Direct16(valueCount);
216
return new Direct32(valueCount);
218
return new Direct64(valueCount);
220
if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) {
221
return new Packed64(valueCount, bitsPerValue);
223
return new Packed32(valueCount, bitsPerValue);
229
* Create a packed integer array writer for the given number of values at the
230
* given bits/value. Writers append to the given IndexOutput and has very
231
* low memory overhead.
232
* @param out the destination for the produced bits.
233
* @param valueCount the number of elements.
234
* @param bitsPerValue the number of bits available for any given value.
235
* @return a Writer ready for receiving values.
236
* @throws IOException if bits could not be written to out.
239
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue)
241
return new PackedWriter(out, valueCount, bitsPerValue);
244
/** Returns how many bits are required to hold values up
245
* to and including maxValue
246
* @param maxValue the maximum value that should be representable.
247
* @return the amount of bits needed to represent values from 0 to maxValue.
250
public static int bitsRequired(long maxValue) {
251
// Very high long values does not translate well to double, so we do an
252
// explicit check for the edge cases
253
if (maxValue > 0x3FFFFFFFFFFFFFFFL) {
255
} if (maxValue > 0x1FFFFFFFFFFFFFFFL) {
258
return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)));
262
* Calculates the maximum unsigned long that can be expressed with the given
264
* @param bitsPerValue the number of bits available for any given value.
265
* @return the maximum value for the given bits.
268
public static long maxValue(int bitsPerValue) {
269
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
272
/** Rounds bitsPerValue up to 8, 16, 32 or 64. */
273
public static int getNextFixedSize(int bitsPerValue) {
274
if (bitsPerValue <= 8) {
276
} else if (bitsPerValue <= 16) {
278
} else if (bitsPerValue <= 32) {
285
/** Possibly wastes some storage in exchange for faster lookups */
286
public static int getRoundedFixedSize(int bitsPerValue) {
287
if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok
288
return getNextFixedSize(bitsPerValue);