1
package org.apache.lucene.search.vectorhighlight;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
20
import java.text.BreakIterator;
21
import java.util.Locale;
23
import org.apache.lucene.util.LuceneTestCase;
25
public class BreakIteratorBoundaryScannerTest extends LuceneTestCase {
26
static final String TEXT =
27
"Apache Lucene(TM) is a high-performance, full-featured text search engine library written entirely in Java." +
28
"\nIt is a technology suitable for nearly any application that requires\n" +
29
"full-text search, especially cross-platform. \nApache Lucene is an open source project available for free download.";
31
public void testOutOfRange() throws Exception {
32
StringBuilder text = new StringBuilder(TEXT);
33
BreakIterator bi = BreakIterator.getWordInstance(Locale.ENGLISH);
34
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
36
int start = TEXT.length() + 1;
37
assertEquals(start, scanner.findStartOffset(text, start));
38
assertEquals(start, scanner.findEndOffset(text, start));
40
assertEquals(start, scanner.findStartOffset(text, start));
42
assertEquals(start, scanner.findEndOffset(text, start));
45
public void testWordBoundary() throws Exception {
46
StringBuilder text = new StringBuilder(TEXT);
47
BreakIterator bi = BreakIterator.getWordInstance(Locale.ENGLISH);
48
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
50
int start = TEXT.indexOf("formance");
51
int expected = TEXT.indexOf("high-performance");
52
testFindStartOffset(text, start, expected, scanner);
54
expected = TEXT.indexOf(", full");
55
testFindEndOffset(text, start, expected, scanner);
58
public void testSentenceBoundary() throws Exception {
59
StringBuilder text = new StringBuilder(TEXT);
60
BreakIterator bi = BreakIterator.getSentenceInstance();
61
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
63
int start = TEXT.indexOf("any application");
64
int expected = TEXT.indexOf("It is a");
65
testFindStartOffset(text, start, expected, scanner);
67
expected = TEXT.indexOf("Apache Lucene is an open source");
68
testFindEndOffset(text, start, expected, scanner);
71
public void testLineBoundary() throws Exception {
72
StringBuilder text = new StringBuilder(TEXT);
73
BreakIterator bi = BreakIterator.getLineInstance();
74
BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
76
int start = TEXT.indexOf("any application");
77
int expected = TEXT.indexOf("nearly");
78
testFindStartOffset(text, start, expected, scanner);
80
expected = TEXT.indexOf("application that requires");
81
testFindEndOffset(text, start, expected, scanner);
84
private void testFindStartOffset(StringBuilder text, int start, int expected, BoundaryScanner scanner) throws Exception {
85
assertEquals(expected, scanner.findStartOffset(text, start));
88
private void testFindEndOffset(StringBuilder text, int start, int expected, BoundaryScanner scanner) throws Exception {
89
assertEquals(expected, scanner.findEndOffset(text, start));