1
package org.apache.lucene.analysis;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.util.AbstractSet;
21
import java.util.Collection;
22
import java.util.Iterator;
25
import org.apache.lucene.util.Version;
28
* A simple class that stores Strings as char[]'s in a
29
* hash table. Note that this is not a general purpose
30
* class. For example, it cannot remove items from the
31
* set, nor does it resize its hash table to be smaller,
32
* etc. It is designed to be quick to test if a char[]
33
* is in the set without the necessity of converting it
35
* <p>You must specify the required {@link Version}
36
* compatibility when creating {@link CharArraySet}:
38
* <li> As of 3.1, supplementary characters are
39
* properly lowercased.</li>
41
* Before 3.1 supplementary characters could not be
42
* lowercased correctly due to the lack of Unicode 4
43
* support in JDK 1.4. To use instances of
44
* {@link CharArraySet} with the behavior before Lucene
45
* 3.1 pass a {@link Version} < 3.1 to the constructors.
47
* <em>Please note:</em> This class implements {@link java.util.Set Set} but
48
* does not behave like it should in all cases. The generic type is
49
* {@code Set<Object>}, because you can add any object to it,
50
* that has a string representation. The add methods will use
51
* {@link Object#toString} and store the result using a {@code char[]}
52
* buffer. The same behavior have the {@code contains()} methods.
53
* The {@link #iterator()} returns an {@code Iterator<String>}.
54
* For type safety also {@link #stringIterator()} is provided.
56
public class CharArraySet extends AbstractSet<Object> {
57
public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
58
private static final Object PLACEHOLDER = new Object();
60
private final CharArrayMap<Object> map;
63
* Create set with enough capacity to hold startSize terms
66
* compatibility match version see <a href="#version">Version
67
* note</a> above for details.
69
* the initial capacity
71
* <code>false</code> if and only if the set should be case sensitive
72
* otherwise <code>true</code>.
74
public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) {
75
this(new CharArrayMap<Object>(matchVersion, startSize, ignoreCase));
79
* Creates a set from a Collection of objects.
82
* compatibility match version see <a href="#version">Version
83
* note</a> above for details.
85
* a collection whose elements to be placed into the set
87
* <code>false</code> if and only if the set should be case sensitive
88
* otherwise <code>true</code>.
90
public CharArraySet(Version matchVersion, Collection<?> c, boolean ignoreCase) {
91
this(matchVersion, c.size(), ignoreCase);
96
* Creates a set with enough capacity to hold startSize terms
99
* the initial capacity
101
* <code>false</code> if and only if the set should be case sensitive
102
* otherwise <code>true</code>.
103
* @deprecated use {@link #CharArraySet(Version, int, boolean)} instead
106
public CharArraySet(int startSize, boolean ignoreCase) {
107
this(Version.LUCENE_30, startSize, ignoreCase);
111
* Creates a set from a Collection of objects.
114
* a collection whose elements to be placed into the set
116
* <code>false</code> if and only if the set should be case sensitive
117
* otherwise <code>true</code>.
118
* @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead
121
public CharArraySet(Collection<?> c, boolean ignoreCase) {
122
this(Version.LUCENE_30, c.size(), ignoreCase);
126
/** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
127
CharArraySet(final CharArrayMap<Object> map){
131
/** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
133
public void clear() {
137
/** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
139
public boolean contains(char[] text, int off, int len) {
140
return map.containsKey(text, off, len);
143
/** true if the <code>CharSequence</code> is in the set */
144
public boolean contains(CharSequence cs) {
145
return map.containsKey(cs);
149
public boolean contains(Object o) {
150
return map.containsKey(o);
154
public boolean add(Object o) {
155
return map.put(o, PLACEHOLDER) == null;
158
/** Add this CharSequence into the set */
159
public boolean add(CharSequence text) {
160
return map.put(text, PLACEHOLDER) == null;
163
/** Add this String into the set */
164
public boolean add(String text) {
165
return map.put(text, PLACEHOLDER) == null;
168
/** Add this char[] directly to the set.
169
* If ignoreCase is true for this Set, the text array will be directly modified.
170
* The user should never modify this text array after calling this method.
172
public boolean add(char[] text) {
173
return map.put(text, PLACEHOLDER) == null;
182
* Returns an unmodifiable {@link CharArraySet}. This allows to provide
183
* unmodifiable views of internal sets for "read-only" use.
186
* a set for which the unmodifiable set is returned.
187
* @return an new unmodifiable {@link CharArraySet}.
188
* @throws NullPointerException
189
* if the given set is <code>null</code>.
191
public static CharArraySet unmodifiableSet(CharArraySet set) {
193
throw new NullPointerException("Given set is null");
194
if (set == EMPTY_SET)
196
if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
198
return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
202
* Returns a copy of the given set as a {@link CharArraySet}. If the given set
203
* is a {@link CharArraySet} the ignoreCase property will be preserved.
207
* @return a copy of the given set as a {@link CharArraySet}. If the given set
208
* is a {@link CharArraySet} the ignoreCase and matchVersion property will be
210
* @deprecated use {@link #copy(Version, Set)} instead.
213
public static CharArraySet copy(final Set<?> set) {
216
return copy(Version.LUCENE_30, set);
220
* Returns a copy of the given set as a {@link CharArraySet}. If the given set
221
* is a {@link CharArraySet} the ignoreCase property will be preserved.
223
* <b>Note:</b> If you intend to create a copy of another {@link CharArraySet} where
224
* the {@link Version} of the source set differs from its copy
225
* {@link #CharArraySet(Version, Collection, boolean)} should be used instead.
226
* The {@link #copy(Version, Set)} will preserve the {@link Version} of the
227
* source set it is an instance of {@link CharArraySet}.
230
* @param matchVersion
231
* compatibility match version see <a href="#version">Version
232
* note</a> above for details. This argument will be ignored if the
233
* given set is a {@link CharArraySet}.
236
* @return a copy of the given set as a {@link CharArraySet}. If the given set
237
* is a {@link CharArraySet} the ignoreCase property as well as the
238
* matchVersion will be of the given set will be preserved.
240
public static CharArraySet copy(final Version matchVersion, final Set<?> set) {
243
if(set instanceof CharArraySet) {
244
final CharArraySet source = (CharArraySet) set;
245
return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
247
return new CharArraySet(matchVersion, set, false);
250
/** The Iterator<String> for this set. Strings are constructed on the fly, so
251
* use <code>nextCharArray</code> for more efficient access.
252
* @deprecated Use the standard iterator, which returns {@code char[]} instances.
255
public class CharArraySetIterator implements Iterator<String> {
258
private CharArraySetIterator() {
262
private void goNext() {
265
while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++;
268
public boolean hasNext() {
272
/** do not modify the returned char[] */
273
public char[] nextCharArray() {
279
/** Returns the next String, as a Set<String> would...
280
* use nextCharArray() for better efficiency. */
281
public String next() {
282
return new String(nextCharArray());
285
public void remove() {
286
throw new UnsupportedOperationException();
290
/** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}).
291
* @deprecated Use {@link #iterator}, which returns {@code char[]} instances.
294
public Iterator<String> stringIterator() {
295
return new CharArraySetIterator();
298
/** Returns an {@link Iterator} depending on the version used:
300
* <li>if {@code matchVersion} ≥ 3.1, it returns {@code char[]} instances in this set.</li>
301
* <li>if {@code matchVersion} is 3.0 or older, it returns new
302
* allocated Strings, so this method violates the Set interface.
303
* It is kept this way for backwards compatibility, normally it should
304
* return {@code char[]} on {@code next()}</li>
307
@Override @SuppressWarnings("unchecked")
308
public Iterator<Object> iterator() {
309
// use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
310
return map.matchVersion.onOrAfter(Version.LUCENE_31) ?
311
map.originalKeySet().iterator() : (Iterator) stringIterator();
315
public String toString() {
316
final StringBuilder sb = new StringBuilder("[");
317
for (Object item : this) {
318
if (sb.length()>1) sb.append(", ");
319
if (item instanceof char[]) {
320
sb.append((char[]) item);
325
return sb.append(']').toString();