1
package org.apache.solr.handler.component;
3
* Licensed to the Apache Software Foundation (ASF) under one or more
4
* contributor license agreements. See the NOTICE file distributed with
5
* this work for additional information regarding copyright ownership.
6
* The ASF licenses this file to You under the Apache License, Version 2.0
7
* (the "License"); you may not use this file except in compliance with
8
* the License. You may obtain a copy of the License at
10
* http://www.apache.org/licenses/LICENSE-2.0
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
19
import org.apache.solr.SolrTestCaseJ4;
20
import org.apache.solr.core.SolrCore;
21
import org.apache.solr.common.params.ModifiableSolrParams;
22
import org.apache.solr.common.params.CommonParams;
23
import org.apache.solr.common.params.TermVectorParams;
24
import org.apache.solr.request.LocalSolrQueryRequest;
25
import org.junit.BeforeClass;
26
import org.junit.Test;
28
import java.util.HashMap;
29
import java.util.ArrayList;
30
import java.util.Arrays;
31
import java.util.List;
37
public class TermVectorComponentTest extends SolrTestCaseJ4 {
39
public static void beforeClass() throws Exception {
40
initCore("solrconfig.xml","schema.xml");
42
assertU(adoc("id", "0",
43
"test_posofftv", "This is a title and another title",
44
"test_basictv", "This is a title and another title",
45
"test_notv", "This is a title and another title",
46
"test_postv", "This is a title and another title",
47
"test_offtv", "This is a title and another title"
49
assertU(adoc("id", "1",
50
"test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
51
"test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
52
"test_notv", "The quick reb fox jumped over the lazy brown dogs.",
53
"test_postv", "The quick reb fox jumped over the lazy brown dogs.",
54
"test_offtv", "The quick reb fox jumped over the lazy brown dogs."
56
assertU(adoc("id", "2",
57
"test_posofftv", "This is a document",
58
"test_basictv", "This is a document",
59
"test_notv", "This is a document",
60
"test_postv", "This is a document",
61
"test_offtv", "This is a document"
63
assertU(adoc("id", "3",
64
"test_posofftv", "another document",
65
"test_basictv", "another document",
66
"test_notv", "another document",
67
"test_postv", "another document",
68
"test_offtv", "another document"
70
//bunch of docs that are variants on blue
71
assertU(adoc("id", "4",
72
"test_posofftv", "blue",
73
"test_basictv", "blue",
78
assertU(adoc("id", "5",
79
"test_posofftv", "blud",
80
"test_basictv", "blud",
85
assertU(adoc("id", "6",
86
"test_posofftv", "boue",
87
"test_basictv", "boue",
92
assertU(adoc("id", "7",
93
"test_posofftv", "glue",
94
"test_basictv", "glue",
99
assertU(adoc("id", "8",
100
"test_posofftv", "blee",
101
"test_basictv", "blee",
103
"test_postv", "blee",
106
assertU(adoc("id", "9",
107
"test_posofftv", "blah",
108
"test_basictv", "blah",
110
"test_postv", "blah",
114
assertNull(h.validateUpdate(commit()));
117
static String tv = "tvrh";
120
public void testBasics() throws Exception {
121
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true")
122
,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
123
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
124
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
125
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
126
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
127
" 'uniqueKeyFieldName':'id'}"
132
public void testOptions() throws Exception {
133
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
134
, TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true")
135
,"/termVectors/doc-0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
138
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
139
, TermVectorParams.ALL, "true")
140
,"/termVectors/doc-0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
143
// test each combination at random
144
final List<String> list = new ArrayList<String>();
145
list.addAll(Arrays.asList("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"));
146
String[][] options = new String[][] {
147
{ TermVectorParams.TF, "'tf':1" },
148
{ TermVectorParams.OFFSETS, "'offsets':{'start':20, 'end':27}" },
149
{ TermVectorParams.POSITIONS, "'positions':{'position':1}" },
150
{ TermVectorParams.DF, "'df':2" },
151
{ TermVectorParams.TF_IDF, "'tf-idf':0.5" } };
152
StringBuilder expected = new StringBuilder("/termVectors/doc-0/test_posofftv/anoth=={");
153
boolean first = true;
154
for (int i = 0; i < options.length; i++) {
155
final boolean use = random.nextBoolean();
158
expected.append(", ");
161
expected.append(options[i][1]);
164
list.add(options[i][0]);
165
list.add(use ? "true" : "false");
168
expected.append("}");
169
assertJQ(req(list.toArray(new String[0])), expected.toString());
173
public void testPerField() throws Exception {
174
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
175
,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true"
176
,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv"
177
,"f.test_posofftv." + TermVectorParams.POSITIONS, "false"
178
,"f.test_offtv." + TermVectorParams.OFFSETS, "false"
179
,"f.test_basictv." + TermVectorParams.DF, "false"
180
,"f.test_basictv." + TermVectorParams.TF, "false"
181
,"f.test_basictv." + TermVectorParams.TF_IDF, "false"
183
,"/termVectors/doc-0/test_basictv=={'anoth':{},'titl':{}}"
184
,"/termVectors/doc-0/test_postv/anoth=={'tf':1, 'positions':{'position':1}, 'df':2, 'tf-idf':0.5}"
185
,"/termVectors/doc-0/test_offtv/anoth=={'tf':1, 'df':2, 'tf-idf':0.5}"
186
,"/termVectors/warnings=={ 'noTermVectors':['test_notv'], 'noPositions':['test_basictv', 'test_offtv'], 'noOffsets':['test_basictv', 'test_postv']}"
191
// TODO: this test is really fragile since it pokes around in solr's guts and makes many assumptions.
192
// it should be rewritten to use the real distributed interface
194
public void testDistributed() throws Exception {
195
SolrCore core = h.getCore();
196
TermVectorComponent tvComp = (TermVectorComponent) core.getSearchComponent("tvComponent");
197
assertTrue("tvComp is null and it shouldn't be", tvComp != null);
198
ModifiableSolrParams params = new ModifiableSolrParams();
199
ResponseBuilder rb = new ResponseBuilder();
200
rb.stage = ResponseBuilder.STAGE_GET_FIELDS;
201
rb.shards = new String[]{"localhost:0", "localhost:1", "localhost:2", "localhost:3"};//we don't actually call these, since we are going to invoke distributedProcess directly
202
rb.resultIds = new HashMap<Object, ShardDoc>();
203
rb.components = new ArrayList<SearchComponent>();
204
rb.components.add(tvComp);
205
params.add(CommonParams.Q, "id:0");
206
params.add(CommonParams.QT, "tvrh");
207
params.add(TermVectorParams.TF, "true");
208
params.add(TermVectorParams.DF, "true");
209
params.add(TermVectorParams.OFFSETS, "true");
210
params.add(TermVectorParams.POSITIONS, "true");
211
params.add(TermVectorComponent.COMPONENT_NAME, "true");
212
rb.req = new LocalSolrQueryRequest(core, params);
213
rb.outgoing = new ArrayList<ShardRequest>();
214
//one doc per shard, but make sure there are enough docs to go around
215
for (int i = 0; i < rb.shards.length; i++){
216
ShardDoc doc = new ShardDoc();
217
doc.id = i; //must be a valid doc that was indexed.
218
doc.score = 1 - (i / (float)rb.shards.length);
219
doc.positionInResponse = i;
220
doc.shard = rb.shards[i];
221
doc.orderInShard = 0;
222
rb.resultIds.put(doc.id, doc);
225
int result = tvComp.distributedProcess(rb);
226
assertTrue(result + " does not equal: " + ResponseBuilder.STAGE_DONE, result == ResponseBuilder.STAGE_DONE);
227
//one outgoing per shard
228
assertTrue("rb.outgoing Size: " + rb.outgoing.size() + " is not: " + rb.shards.length, rb.outgoing.size() == rb.shards.length);
229
for (ShardRequest request : rb.outgoing) {
230
ModifiableSolrParams solrParams = request.params;
231
log.info("Shard: " + Arrays.asList(request.shards) + " Params: " + solrParams);
241
* <field name="test_basictv" type="text" termVectors="true"/>
242
<field name="test_notv" type="text" termVectors="false"/>
243
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
244
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
245
<field name="test_posofftv" type="text" termVectors="true"
246
termPositions="true" termOffsets="true"/>