1
package org.apache.lucene.util;
3
import java.io.EOFException;
4
import java.io.IOException;
5
import java.io.InputStream;
6
import java.io.OutputStream;
9
* Licensed to the Apache Software Foundation (ASF) under one or more
10
* contributor license agreements. See the NOTICE file distributed with
11
* this work for additional information regarding copyright ownership.
12
* The ASF licenses this file to You under the Apache License, Version 2.0
13
* (the "License"); you may not use this file except in compliance with
14
* the License. You may obtain a copy of the License at
16
* http://www.apache.org/licenses/LICENSE-2.0
18
* Unless required by applicable law or agreed to in writing, software
19
* distributed under the License is distributed on an "AS IS" BASIS,
20
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
* See the License for the specific language governing permissions and
22
* limitations under the License.
26
* Variable-length encoding of 32-bit integers, into 8-bit bytes. A number is encoded as follows:
28
* <li>If it is less than 127 and non-negative (i.e., if the number uses only 7 bits), it is encoded as
29
* as single byte: 0bbbbbbb.
30
* <li>If its highest nonzero bit is greater than bit 6 (0x40), it is represented as a series of
32
* 7 LSB containing bits from the original value, with the MSB set for all but the last
33
* byte. The first encoded byte contains the highest nonzero bits from the
34
* original; the second byte contains the next 7 MSB; and so on, with the last byte
35
* containing the 7 LSB of the original.
39
* <li>n = 117 = 1110101: This has fewer than 8 significant bits, and so is encoded as
41
* <li>n = 100000 = (binary) 11000011010100000. This has 17 significant bits, and so needs
42
* three Vint8 bytes. Left-zero-pad it to a multiple of 7 bits, then split it into chunks of 7
43
* and add an MSB, 0 for the last byte, 1 for the others: 1|0000110 1|0001101 0|0100000
46
* This encoder/decoder will correctly handle any 32-bit integer, but for negative numbers,
47
* and positive numbers with more than 28 significant bits, encoding requires 5 bytes; this
48
* is not an efficient encoding scheme for large
49
* positive numbers or any negative number.
51
* <b>Compatibility:</b><br>
52
* This class has been used in products that have shipped to customers, and is needed to
53
* decode legacy data. Do not modify this class in ways that will break compatibility.
55
* @lucene.experimental
60
* Because Java lacks call-by-reference, this class boxes the decoding position, which
61
* is initially set by the caller, and returned after decoding, incremented by the number
64
public static class Position {
66
* Creates a position value set to zero.
69
// The initial position is zero by default.
72
* Creates a position set to {@code initialPosition}.
73
* @param initialPosition The starting decoding position in the source buffer.
75
public Position(int initialPosition) {
76
this.pos = initialPosition;
79
* The value passed by reference.
85
* Returns the number of bytes needed to encode {@code number}.
86
* @param number The number whose encoded length is needed.
87
* @return The number of bytes needed to encode {@code number}.
89
public static int bytesNeeded(int number) {
90
if ((number & ~0x7F) == 0) {
92
} else if ((number & ~0x3FFF) == 0) {
94
} else if ((number & ~0x1FFFFF) == 0) {
96
} else if ((number & ~0xFFFFFFF) == 0) {
104
* The maximum number of bytes needed to encode a number using {@code Vint8}.
106
public static final int MAXIMUM_BYTES_NEEDED = 5;
109
* Encodes {@code number} to {@code out}.
110
* @param number The value to be written in encoded form, to {@code out}.
111
* @param out The output stream receiving the encoded bytes.
112
* @exception IOException If there is a problem writing to {@code out}.
114
public static void encode(int number, OutputStream out) throws IOException {
115
if ((number & ~0x7F) == 0) {
117
} else if ((number & ~0x3FFF) == 0) {
118
out.write(0x80 | (number >> 7));
119
out.write(0x7F & number);
120
} else if ((number & ~0x1FFFFF) == 0) {
121
out.write(0x80 | (number >> 14));
122
out.write(0x80 | (number >> 7));
123
out.write(0x7F & number);
124
} else if ((number & ~0xFFFFFFF) == 0) {
125
out.write(0x80 | (number >> 21));
126
out.write(0x80 | (number >> 14));
127
out.write(0x80 | (number >> 7));
128
out.write(0x7F & number);
130
out.write(0x80 | (number >> 28));
131
out.write(0x80 | (number >> 21));
132
out.write(0x80 | (number >> 14));
133
out.write(0x80 | (number >> 7));
134
out.write(0x7F & number);
139
* Encodes {@code number} into {@code dest}, starting at offset {@code start} from
140
* the beginning of the array. This method assumes {@code dest} is large enough to
141
* hold the required number of bytes.
142
* @param number The number to be encoded.
143
* @param dest The destination array.
144
* @param start The starting offset in the array.
145
* @return The number of bytes used in the array.
147
public static int encode(int number, byte[] dest, int start) {
148
if ((number & ~0x7F) == 0) {
149
dest[start] = (byte) number;
151
} else if ((number & ~0x3FFF) == 0) {
152
dest[start] = (byte) (0x80 | ((number & 0x3F80) >> 7));
153
dest[start + 1] = (byte) (number & 0x7F);
155
} else if ((number & ~0x1FFFFF) == 0) {
156
dest[start] = (byte) (0x80 | ((number & 0x1FC000) >> 14));
157
dest[start + 1] = (byte) (0x80 | ((number & 0x3F80) >> 7));
158
dest[start + 2] = (byte) (number & 0x7F);
160
} else if ((number & ~0xFFFFFFF) == 0) {
161
dest[start] = (byte) (0x80 | ((number & 0xFE00000) >> 21));
162
dest[start + 1] = (byte) (0x80 | ((number & 0x1FC000) >> 14));
163
dest[start + 2] = (byte) (0x80 | ((number & 0x3F80) >> 7));
164
dest[start + 3] = (byte) (number & 0x7F);
167
dest[start] = (byte) (0x80 | ((number & 0xF0000000) >> 28));
168
dest[start + 1] = (byte) (0x80 | ((number & 0xFE00000) >> 21));
169
dest[start + 2] = (byte) (0x80 | ((number & 0x1FC000) >> 14));
170
dest[start + 3] = (byte) (0x80 | ((number & 0x3F80) >> 7));
171
dest[start + 4] = (byte) (number & 0x7F);
177
* Decodes a 32-bit integer from {@code bytes}, beginning at offset {@code pos.pos}.
178
* The decoded value is returned, and {@code pos.pos} is incremented by the number of
180
* @param bytes The byte array containing an encoded value.
181
* @param pos On entry, the starting position in the array; on return, one greater
182
* than the position of the last byte decoded in the call.
183
* @return The decoded value.
185
public static int decode(byte[] bytes, Position pos) {
188
byte first = bytes[pos.pos];
190
value |= first & 0x7F;
191
if ((first & 0x80) == 0) {
199
* Decodes a 32-bit integer from bytes read from {@code in}. Bytes are read,
200
* one at a time, from {@code in}, and it is assumed they represent a 32-bit
201
* integer encoded using this class's encoding scheme. The decoded value is
203
* @param in The input stream containing the encoded bytes.
204
* @return The decoded value.
205
* @exception EOFException If the stream ends before a value has been decoded.
207
public static int decode(InputStream in) throws IOException {
210
int first = in.read();
212
throw new EOFException();
214
value |= first & 0x7F;
215
if ((first & 0x80) == 0) {
223
* The default ctor is made private because all methods of this class are static.
226
// Just making it impossible to instantiate.