1
package org.apache.lucene.index;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.IOException;
21
import java.io.UnsupportedEncodingException;
22
import java.util.HashMap;
24
import java.util.Random;
26
import org.apache.lucene.analysis.Analyzer;
27
import org.apache.lucene.analysis.MockAnalyzer;
28
import org.apache.lucene.analysis.MockTokenizer;
29
import org.apache.lucene.analysis.WhitespaceAnalyzer;
30
import org.apache.lucene.document.Document;
31
import org.apache.lucene.document.Field;
32
import org.apache.lucene.document.Fieldable;
33
import org.apache.lucene.search.Similarity;
34
import org.apache.lucene.document.Field.Index;
35
import org.apache.lucene.document.Field.Store;
36
import org.apache.lucene.document.Field.TermVector;
37
import org.apache.lucene.index.FieldInfo.IndexOptions;
38
import org.apache.lucene.store.Directory;
39
import org.apache.lucene.util.LuceneTestCase;
41
import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
44
public static final String FIELD_1_TEXT = "field one text";
45
public static final String TEXT_FIELD_1_KEY = "textField1";
46
public static Field textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT,
47
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
49
public static final String FIELD_2_TEXT = "field field field two text";
50
//Fields will be lexicographically sorted. So, the order is: field, text, two
51
public static final int [] FIELD_2_FREQS = {3, 1, 1};
52
public static final String TEXT_FIELD_2_KEY = "textField2";
53
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
55
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
56
public static final String TEXT_FIELD_3_KEY = "textField3";
57
public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
58
static { textField3.setOmitNorms(true); }
60
public static final String KEYWORD_TEXT = "Keyword";
61
public static final String KEYWORD_FIELD_KEY = "keyField";
62
public static Field keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT,
63
Field.Store.YES, Field.Index.NOT_ANALYZED);
65
public static final String NO_NORMS_TEXT = "omitNormsText";
66
public static final String NO_NORMS_KEY = "omitNorms";
67
public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT,
68
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
70
public static final String NO_TF_TEXT = "analyzed with no tf and positions";
71
public static final String NO_TF_KEY = "omitTermFreqAndPositions";
72
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
73
Field.Store.YES, Field.Index.ANALYZED);
75
noTFField.setIndexOptions(IndexOptions.DOCS_ONLY);
78
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
79
public static final String UNINDEXED_FIELD_KEY = "unIndField";
80
public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT,
81
Field.Store.YES, Field.Index.NO);
84
public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
85
public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
86
public static Field unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT,
87
Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
89
public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
90
public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
91
public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
92
Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
94
public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
95
public static byte [] LAZY_FIELD_BINARY_BYTES;
96
public static Field lazyFieldBinary;
98
public static final String LAZY_FIELD_KEY = "lazyField";
99
public static final String LAZY_FIELD_TEXT = "These are some field bytes";
100
public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
102
public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
103
public static String LARGE_LAZY_FIELD_TEXT;
104
public static Field largeLazyField;
107
public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
108
public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
109
public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT,
110
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
112
public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
113
//Fields will be lexicographically sorted. So, the order is: field, text, two
114
public static final int [] FIELD_UTF2_FREQS = {3, 1, 1};
115
public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
116
public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES,
117
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
122
public static Map<String,Object> nameValues = null;
124
// ordered list of all the fields...
125
// could use LinkedHashMap for this purpose if Java1.4 is OK
126
public static Field[] fields = new Field[] {
139
lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last.
140
largeLazyField//placeholder for large field, since this is null. It must always be last
143
public static Map<String,Fieldable> all =new HashMap<String,Fieldable>();
144
public static Map<String,Fieldable> indexed =new HashMap<String,Fieldable>();
145
public static Map<String,Fieldable> stored =new HashMap<String,Fieldable>();
146
public static Map<String,Fieldable> unstored=new HashMap<String,Fieldable>();
147
public static Map<String,Fieldable> unindexed=new HashMap<String,Fieldable>();
148
public static Map<String,Fieldable> termvector=new HashMap<String,Fieldable>();
149
public static Map<String,Fieldable> notermvector=new HashMap<String,Fieldable>();
150
public static Map<String,Fieldable> lazy= new HashMap<String,Fieldable>();
151
public static Map<String,Fieldable> noNorms=new HashMap<String,Fieldable>();
152
public static Map<String,Fieldable> noTf=new HashMap<String,Fieldable>();
155
//Initialize the large Lazy Field
156
StringBuilder buffer = new StringBuilder();
157
for (int i = 0; i < 10000; i++)
159
buffer.append("Lazily loading lengths of language in lieu of laughing ");
163
LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8");
164
} catch (UnsupportedEncodingException e) {
166
lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
167
fields[fields.length - 2] = lazyFieldBinary;
168
LARGE_LAZY_FIELD_TEXT = buffer.toString();
169
largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
170
fields[fields.length - 1] = largeLazyField;
171
for (int i=0; i<fields.length; i++) {
172
Fieldable f = fields[i];
174
if (f.isIndexed()) add(indexed,f);
175
else add(unindexed,f);
176
if (f.isTermVectorStored()) add(termvector,f);
177
if (f.isIndexed() && !f.isTermVectorStored()) add(notermvector,f);
178
if (f.isStored()) add(stored,f);
179
else add(unstored,f);
180
if (f.getOmitNorms()) add(noNorms,f);
181
if (f.getIndexOptions() == IndexOptions.DOCS_ONLY) add(noTf,f);
182
if (f.isLazy()) add(lazy, f);
187
private static void add(Map<String,Fieldable> map, Fieldable field) {
188
map.put(field.name(), field);
194
nameValues = new HashMap<String,Object>();
195
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
196
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
197
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
198
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
199
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
200
nameValues.put(NO_TF_KEY, NO_TF_TEXT);
201
nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
202
nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
203
nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
204
nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
205
nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
206
nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
207
nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
208
nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
212
* Adds the fields above to a document
213
* @param doc The document to write
215
public static void setupDoc(Document doc) {
216
for (int i=0; i<fields.length; i++) {
222
* Writes the document to the directory using a segment
223
* named "test"; returns the SegmentInfo describing the new
227
* @throws IOException
229
public static SegmentInfo writeDoc(Random random, Directory dir, Document doc) throws IOException
231
return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc);
235
* Writes the document to the directory using the analyzer
236
* and the similarity score; returns the SegmentInfo
237
* describing the new segment
242
* @throws IOException
244
public static SegmentInfo writeDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
245
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
246
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
247
//writer.setUseCompoundFile(false);
248
writer.addDocument(doc);
250
SegmentInfo info = writer.newestSegment();
255
public static int numFields(Document doc) {
256
return doc.getFields().size();
259
public static Document createDocument(int n, String indexName, int numFields) {
260
StringBuilder sb = new StringBuilder();
261
Document doc = new Document();
262
doc.add(new Field("id", Integer.toString(n), Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
263
doc.add(new Field("indexname", indexName, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
266
doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
269
for (int i = 1; i < numFields; i++) {
270
doc.add(new Field("field" + (i + 1), sb.toString(), Store.YES,
271
Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));