1
package org.apache.lucene.analysis.tokenattributes;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.io.Serializable;
21
import java.nio.CharBuffer;
23
import org.apache.lucene.util.ArrayUtil;
24
import org.apache.lucene.util.AttributeImpl;
25
import org.apache.lucene.util.AttributeReflector;
26
import org.apache.lucene.util.RamUsageEstimator;
29
* The term text of a Token.
31
public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermAttribute, Cloneable, Serializable {
32
private static int MIN_BUFFER_SIZE = 10;
34
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
35
private int termLength = 0;
38
public String term() {
39
// don't delegate to toString() here!
40
return new String(termBuffer, 0, termLength);
43
public final void copyBuffer(char[] buffer, int offset, int length) {
44
growTermBuffer(length);
45
System.arraycopy(buffer, offset, termBuffer, 0, length);
50
public void setTermBuffer(char[] buffer, int offset, int length) {
51
copyBuffer(buffer, offset, length);
55
public void setTermBuffer(String buffer) {
56
int length = buffer.length();
57
growTermBuffer(length);
58
buffer.getChars(0, length, termBuffer, 0);
63
public void setTermBuffer(String buffer, int offset, int length) {
64
assert offset <= buffer.length();
65
assert offset + length <= buffer.length();
66
growTermBuffer(length);
67
buffer.getChars(offset, offset + length, termBuffer, 0);
71
public final char[] buffer() {
76
public char[] termBuffer() {
80
public final char[] resizeBuffer(int newSize) {
81
if(termBuffer.length < newSize){
82
// Not big enough; create a new array with slight
83
// over allocation and preserve content
84
final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
85
System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
86
termBuffer = newCharBuffer;
92
public char[] resizeTermBuffer(int newSize) {
93
return resizeBuffer(newSize);
96
private void growTermBuffer(int newSize) {
97
if(termBuffer.length < newSize){
98
// Not big enough; create a new array with slight
100
termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
105
public int termLength() {
109
public final CharTermAttribute setLength(int length) {
110
if (length > termBuffer.length)
111
throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
116
public final CharTermAttribute setEmpty() {
122
public void setTermLength(int length) {
126
// *** CharSequence interface ***
127
public final int length() {
131
public final char charAt(int index) {
132
if (index >= termLength)
133
throw new IndexOutOfBoundsException();
134
return termBuffer[index];
137
public final CharSequence subSequence(final int start, final int end) {
138
if (start > termLength || end > termLength)
139
throw new IndexOutOfBoundsException();
140
return new String(termBuffer, start, end - start);
143
// *** Appendable interface ***
145
public final CharTermAttribute append(CharSequence csq) {
146
if (csq == null) // needed for Appendable compliance
148
return append(csq, 0, csq.length());
151
public final CharTermAttribute append(CharSequence csq, int start, int end) {
152
if (csq == null) // needed for Appendable compliance
154
final int len = end - start, csqlen = csq.length();
155
if (len < 0 || start > csqlen || end > csqlen)
156
throw new IndexOutOfBoundsException();
159
resizeBuffer(termLength + len);
160
if (len > 4) { // only use instanceof check series for longer CSQs, else simply iterate
161
if (csq instanceof String) {
162
((String) csq).getChars(start, end, termBuffer, termLength);
163
} else if (csq instanceof StringBuilder) {
164
((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
165
} else if (csq instanceof CharTermAttribute) {
166
System.arraycopy(((CharTermAttribute) csq).buffer(), start, termBuffer, termLength, len);
167
} else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
168
final CharBuffer cb = (CharBuffer) csq;
169
System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, len);
170
} else if (csq instanceof StringBuffer) {
171
((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
174
termBuffer[termLength++] = csq.charAt(start++);
175
// no fall-through here, as termLength is updated!
182
termBuffer[termLength++] = csq.charAt(start++);
187
public final CharTermAttribute append(char c) {
188
resizeBuffer(termLength + 1)[termLength++] = c;
192
// *** For performance some convenience methods in addition to CSQ's ***
194
public final CharTermAttribute append(String s) {
195
if (s == null) // needed for Appendable compliance
197
final int len = s.length();
198
s.getChars(0, len, resizeBuffer(termLength + len), termLength);
203
public final CharTermAttribute append(StringBuilder s) {
204
if (s == null) // needed for Appendable compliance
206
final int len = s.length();
207
s.getChars(0, len, resizeBuffer(termLength + len), termLength);
212
public final CharTermAttribute append(CharTermAttribute ta) {
213
if (ta == null) // needed for Appendable compliance
215
final int len = ta.length();
216
System.arraycopy(ta.buffer(), 0, resizeBuffer(termLength + len), termLength, len);
221
private CharTermAttribute appendNull() {
222
resizeBuffer(termLength + 4);
223
termBuffer[termLength++] = 'n';
224
termBuffer[termLength++] = 'u';
225
termBuffer[termLength++] = 'l';
226
termBuffer[termLength++] = 'l';
230
// *** AttributeImpl ***
233
public int hashCode() {
234
int code = termLength;
235
code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
240
public void clear() {
245
public Object clone() {
246
CharTermAttributeImpl t = (CharTermAttributeImpl)super.clone();
248
t.termBuffer = new char[this.termLength];
249
System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
254
public boolean equals(Object other) {
259
if (other instanceof CharTermAttributeImpl) {
260
final CharTermAttributeImpl o = ((CharTermAttributeImpl) other);
261
if (termLength != o.termLength)
263
for(int i=0;i<termLength;i++) {
264
if (termBuffer[i] != o.termBuffer[i]) {
275
* Returns solely the term text as specified by the
276
* {@link CharSequence} interface.
277
* <p>This method changed the behavior with Lucene 3.1,
278
* before it returned a String representation of the whole
279
* term with all attributes.
280
* This affects especially the
281
* {@link org.apache.lucene.analysis.Token} subclass.
284
public String toString() {
285
// CharSequence requires that only the contents are returned, but this is orginal code: "term=" + new String(termBuffer, 0, termLength)
286
return new String(termBuffer, 0, termLength);
290
public void reflectWith(AttributeReflector reflector) {
291
reflector.reflect(CharTermAttribute.class, "term", toString());
295
public void copyTo(AttributeImpl target) {
296
if (target instanceof CharTermAttribute) {
297
CharTermAttribute t = (CharTermAttribute) target;
298
t.copyBuffer(termBuffer, 0, termLength);
300
TermAttribute t = (TermAttribute) target;
301
t.setTermBuffer(termBuffer, 0, termLength);