1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import org.apache.lucene.util.LuceneTestCase;
21
import org.apache.lucene.store.IndexInput;
22
import org.apache.lucene.store.IndexOutput;
23
import org.apache.lucene.store.RAMDirectory;
25
import java.io.IOException;
27
public class TestIndexInput extends LuceneTestCase {
29
static final byte[] READ_TEST_BYTES = new byte[] {
32
(byte) 0x80, (byte) 0x80, 0x01,
33
(byte) 0x81, (byte) 0x80, 0x01,
34
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
35
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07,
36
(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F,
37
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
39
// 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK")
40
0x02, (byte) 0xC2, (byte) 0xBF,
41
0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF,
42
'c', 'e', (byte) 0xC2, (byte) 0xBF,
45
// 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES")
46
0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
47
0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
48
'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0,
52
// (U+1D11E "MUSICAL SYMBOL G CLEF")
53
// (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE")
54
0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
55
0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
56
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
58
(byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E,
60
(byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0,
65
0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e',
68
private void checkReads(IndexInput is) throws IOException {
69
assertEquals(128,is.readVInt());
70
assertEquals(16383,is.readVInt());
71
assertEquals(16384,is.readVInt());
72
assertEquals(16385,is.readVInt());
73
assertEquals(Integer.MAX_VALUE, is.readVInt());
74
assertEquals((long) Integer.MAX_VALUE, is.readVLong());
75
assertEquals(Long.MAX_VALUE, is.readVLong());
76
assertEquals("Lucene",is.readString());
78
assertEquals("\u00BF",is.readString());
79
assertEquals("Lu\u00BFce\u00BFne",is.readString());
81
assertEquals("\u2620",is.readString());
82
assertEquals("Lu\u2620ce\u2620ne",is.readString());
84
assertEquals("\uD834\uDD1E",is.readString());
85
assertEquals("\uD834\uDD1E\uD834\uDD60",is.readString());
86
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne",is.readString());
88
assertEquals("\u0000",is.readString());
89
assertEquals("Lu\u0000ce\u0000ne",is.readString());
92
// this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput
93
public void testBufferedIndexInputRead() throws IOException {
94
final IndexInput is = new MockIndexInput(READ_TEST_BYTES);
99
// this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly
100
public void testRawIndexInputRead() throws IOException {
101
final RAMDirectory dir = new RAMDirectory();
102
final IndexOutput os = dir.createOutput("foo");
103
os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length);
105
final IndexInput is = dir.openInput("foo");
114
* @throws IOException
116
public void testSkipChars() throws IOException {
117
byte[] bytes = new byte[]{(byte) 0x80, 0x01,
119
(byte) 0x80, (byte) 0x80, 0x01,
120
(byte) 0x81, (byte) 0x80, 0x01,
121
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
123
String utf8Str = "\u0634\u1ea1";
124
byte [] utf8Bytes = utf8Str.getBytes("UTF-8");
125
byte [] theBytes = new byte[bytes.length + 1 + utf8Bytes.length];
126
System.arraycopy(bytes, 0, theBytes, 0, bytes.length);
127
theBytes[bytes.length] = (byte)utf8Str.length();//Add in the number of chars we are storing, which should fit in a byte for this test
128
System.arraycopy(utf8Bytes, 0, theBytes, bytes.length + 1, utf8Bytes.length);
129
IndexInput is = new MockIndexInput(theBytes);
130
assertEquals(128, is.readVInt());
131
assertEquals(16383, is.readVInt());
132
assertEquals(16384, is.readVInt());
133
assertEquals(16385, is.readVInt());
134
int charsToRead = is.readVInt();//number of chars in the Lucene string
135
assertTrue(0x06 + " does not equal: " + charsToRead, 0x06 == charsToRead);
137
char [] chars = new char[3];//there should be 6 chars remaining
138
is.readChars(chars, 0, 3);
139
String tmpStr = new String(chars);
140
assertTrue(tmpStr + " is not equal to " + "ene", tmpStr.equals("ene" ) == true);
141
//Now read the UTF8 stuff
142
charsToRead = is.readVInt() - 1;//since we are skipping one
144
assertTrue(utf8Str.length() - 1 + " does not equal: " + charsToRead, utf8Str.length() - 1 == charsToRead);
145
chars = new char[charsToRead];
146
is.readChars(chars, 0, charsToRead);
147
tmpStr = new String(chars);
148
assertTrue(tmpStr + " is not equal to " + utf8Str.substring(1), tmpStr.equals(utf8Str.substring(1)) == true);