2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.solr.analysis;
20
import org.apache.lucene.analysis.Token;
21
import org.apache.lucene.analysis.TokenStream;
22
import org.apache.lucene.analysis.Tokenizer;
23
import org.apache.lucene.analysis.WhitespaceTokenizer;
24
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
26
import java.io.IOException;
27
import java.io.StringReader;
30
* Test that BufferedTokenStream behaves as advertised in subclasses.
32
public class TestBufferedTokenStream extends BaseTokenTestCase {
34
/** Example of a class implementing the rule "A" "B" => "Q" "B" */
35
public static class AB_Q_Stream extends BufferedTokenStream {
36
public AB_Q_Stream(TokenStream input) {super(input);}
38
protected Token process(Token t) throws IOException {
39
if ("A".equals(new String(t.buffer(), 0, t.length()))) {
41
if (t2!=null && "B".equals(new String(t2.buffer(), 0, t2.length()))) t.setEmpty().append("Q");
42
if (t2!=null) pushBack(t2);
48
/** Example of a class implementing "A" "B" => "A" "A" "B" */
49
public static class AB_AAB_Stream extends BufferedTokenStream {
50
public AB_AAB_Stream(TokenStream input) {super(input);}
52
protected Token process(Token t) throws IOException {
53
if ("A".equals(new String(t.buffer(), 0, t.length())) &&
54
"B".equals(new String(peek(1).buffer(), 0, peek(1).length())))
55
write((Token)t.clone());
60
public void testABQ() throws Exception {
61
final String input = "How now A B brown A cow B like A B thing?";
62
final String expected = "How now Q B brown A cow B like Q B thing?";
63
TokenStream ts = new AB_Q_Stream
64
(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
65
assertTokenStreamContents(ts, expected.split("\\s"));
68
public void testABAAB() throws Exception {
69
final String input = "How now A B brown A cow B like A B thing?";
70
final String expected = "How now A A B brown A cow B like A A B thing?";
71
TokenStream ts = new AB_AAB_Stream
72
(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)));
73
assertTokenStreamContents(ts, expected.split("\\s"));
76
public void testReset() throws Exception {
77
final String input = "How now A B brown A cow B like A B thing?";
78
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
79
TokenStream ts = new AB_AAB_Stream(tokenizer);
80
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
81
assertTrue(ts.incrementToken());
82
assertEquals("How", term.toString());
83
assertTrue(ts.incrementToken());
84
assertEquals("now", term.toString());
85
assertTrue(ts.incrementToken());
86
assertEquals("A", term.toString());
87
// reset back to input,
88
// if reset() does not work correctly then previous buffered tokens will remain
89
tokenizer.reset(new StringReader(input));
91
assertTrue(ts.incrementToken());
92
assertEquals("How", term.toString());