2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
18
package org.apache.solr.search;
20
import org.apache.lucene.index.Term;
21
import org.apache.lucene.queryParser.ParseException;
22
import org.apache.lucene.queryParser.QueryParser;
23
import org.apache.lucene.search.BooleanClause;
24
import org.apache.lucene.search.BooleanQuery;
25
import org.apache.lucene.search.ConstantScoreQuery;
26
import org.apache.lucene.search.FuzzyQuery;
27
import org.apache.lucene.search.NumericRangeQuery;
28
import org.apache.lucene.search.PrefixQuery;
29
import org.apache.lucene.search.Query;
30
import org.apache.lucene.search.Sort;
31
import org.apache.lucene.search.SortField;
32
import org.apache.lucene.search.TermQuery;
33
import org.apache.lucene.search.TermRangeQuery;
34
import org.apache.lucene.search.WildcardQuery;
35
import org.apache.solr.common.SolrException;
36
import org.apache.solr.common.params.MapSolrParams;
37
import org.apache.solr.common.params.SolrParams;
38
import org.apache.solr.core.SolrCore;
39
import org.apache.solr.request.LocalSolrQueryRequest;
40
import org.apache.solr.request.SolrQueryRequest;
41
import org.apache.solr.schema.FieldType;
42
import org.apache.solr.schema.IndexSchema;
43
import org.apache.solr.schema.SchemaField;
44
import org.apache.solr.search.function.FunctionQuery;
45
import org.apache.solr.search.function.QueryValueSource;
46
import java.io.IOException;
47
import java.util.ArrayList;
48
import java.util.HashMap;
49
import java.util.List;
53
* Collection of static utilities useful for query parsing.
55
* @version $Id: QueryParsing.java 1152657 2011-07-31 22:43:13Z hossman $
57
public class QueryParsing {
58
public static final String OP = "q.op"; // the SolrParam used to override the QueryParser "default operator"
59
public static final String V = "v"; // value of this parameter
60
public static final String F = "f"; // field that a query or command pertains to
61
public static final String TYPE = "type";// type of this query or command
62
public static final String DEFTYPE = "defType"; // default type for any direct subqueries
63
public static final String LOCALPARAM_START = "{!";
64
public static final char LOCALPARAM_END = '}';
65
public static final String DOCID = "_docid_";
66
public static final String SCORE = "score";
68
// true if the value was specified by the "v" param (i.e. v=myval, or v=$param)
69
public static final String VAL_EXPLICIT = "__VAL_EXPLICIT__";
73
* Helper utility for parsing a query using the Lucene QueryParser syntax.
75
* @param qs query expression in standard Lucene syntax
76
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
78
public static Query parseQuery(String qs, IndexSchema schema) {
79
return parseQuery(qs, null, schema);
83
* Helper utility for parsing a query using the Lucene QueryParser syntax.
85
* @param qs query expression in standard Lucene syntax
86
* @param defaultField default field used for unqualified search terms in the query expression
87
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
89
public static Query parseQuery(String qs, String defaultField, IndexSchema schema) {
91
Query query = schema.getSolrQueryParser(defaultField).parse(qs);
93
if (SolrCore.log.isTraceEnabled()) {
94
SolrCore.log.trace("After QueryParser:" + query);
99
} catch (ParseException e) {
101
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing Lucene query", e);
106
* Helper utility for parsing a query using the Lucene QueryParser syntax.
108
* @param qs query expression in standard Lucene syntax
109
* @param defaultField default field used for unqualified search terms in the query expression
110
* @param params used to determine the default operator, overriding the schema specified operator
111
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
113
public static Query parseQuery(String qs, String defaultField, SolrParams params, IndexSchema schema) {
115
SolrQueryParser parser = schema.getSolrQueryParser(defaultField);
116
String opParam = params.get(OP);
117
if (opParam != null) {
118
parser.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR);
120
Query query = parser.parse(qs);
122
if (SolrCore.log.isTraceEnabled()) {
123
SolrCore.log.trace("After QueryParser:" + query);
128
} catch (ParseException e) {
130
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Query parsing error: " + e.getMessage(), e);
135
// note to self: something needs to detect infinite recursion when parsing queries
136
static int parseLocalParams(String txt, int start, Map<String, String> target, SolrParams params) throws ParseException {
138
if (!txt.startsWith(LOCALPARAM_START, off)) return start;
139
StrParser p = new StrParser(txt, start, txt.length());
140
p.pos += 2; // skip over "{!"
144
if (p.pos>=txt.length()) {
145
throw new ParseException("Missing '}' parsing local params '" + txt + '"');
149
if (ch == LOCALPARAM_END) {
153
String id = p.getId();
154
if (id.length() == 0) {
155
throw new ParseException("Expected identifier '}' parsing local params '" + txt + '"');
162
// single word... treat {!func} as type=func for easy lookup
166
// saw equals, so read value
169
boolean deref = false;
173
deref = true; // dereference whatever value is read by treating it as a variable name
176
if (ch == '\"' || ch == '\'') {
177
val = p.getQuotedString();
179
// read unquoted literal ended by whitespace or '}'
180
// there is no escaping.
181
int valStart = p.pos;
183
if (p.pos >= p.end) {
184
throw new ParseException("Missing end to unquoted value starting at " + valStart + " str='" + txt + "'");
186
char c = p.val.charAt(p.pos);
187
if (c == LOCALPARAM_END || Character.isWhitespace(c)) {
188
val = p.val.substring(valStart, p.pos);
195
if (deref) { // dereference parameter
196
if (params != null) {
197
val = params.get(val);
201
if (target != null) target.put(id, val);
205
public static String encodeLocalParamVal(String val) {
206
int len = val.length();
208
if (len > 0 && val.charAt(0) != '$') {
210
char ch = val.charAt(i);
211
if (Character.isWhitespace(ch) || ch=='}') break;
215
if (i>=len) return val;
217
// We need to enclose in quotes... but now we need to escape
218
StringBuilder sb = new StringBuilder(val.length() + 4);
220
for (i=0; i<len; i++) {
221
char ch = val.charAt(i);
228
return sb.toString();
234
* "{!prefix f=myfield}yes" returns type="prefix",f="myfield",v="yes"
235
* "{!prefix f=myfield v=$p}" returns type="prefix",f="myfield",v=params.get("p")
237
public static SolrParams getLocalParams(String txt, SolrParams params) throws ParseException {
238
if (txt == null || !txt.startsWith(LOCALPARAM_START)) {
241
Map<String, String> localParams = new HashMap<String, String>();
242
int start = QueryParsing.parseLocalParams(txt, 0, localParams, params);
244
String val = localParams.get(V);
246
val = txt.substring(start);
247
localParams.put(V, val);
249
// localParams.put(VAL_EXPLICIT, "true");
251
return new MapSolrParams(localParams);
256
* Returns null if the sortSpec is the standard sort desc.
259
* The form of the sort specification string currently parsed is:
262
* SortSpec ::= SingleSort [, SingleSort]*
263
* SingleSort ::= <fieldname> SortDirection
264
* SortDirection ::= top | desc | bottom | asc
268
* score desc #normal sort by score (will return null)
269
* weight bottom #sort by weight ascending
270
* weight desc #sort by weight descending
271
* height desc,weight desc #sort by height descending, and use weight descending to break any ties
272
* height desc,weight asc #sort by height descending, using weight ascending as a tiebreaker
275
public static Sort parseSort(String sortSpec, SolrQueryRequest req) {
276
if (sortSpec == null || sortSpec.length() == 0) return null;
277
List<SortField> lst = new ArrayList<SortField>(4);
281
StrParser sp = new StrParser(sortSpec);
282
while (sp.pos < sp.end) {
285
final int start = sp.pos;
287
// short circuit test for a really simple field name
288
String field = sp.getId(null);
289
Exception qParserException = null;
291
if (field == null || !Character.isWhitespace(sp.peekChar())) {
292
// let's try it as a function instead
294
String funcStr = sp.val.substring(start);
296
QParser parser = QParser.getParser(funcStr, FunctionQParserPlugin.NAME, req);
299
if (parser instanceof FunctionQParser) {
300
FunctionQParser fparser = (FunctionQParser)parser;
301
fparser.setParseMultipleSources(false);
302
fparser.setParseToEnd(false);
304
q = fparser.getQuery();
306
if (fparser.localParams != null) {
307
if (fparser.valFollowedParams) {
308
// need to find the end of the function query via the string parser
309
int leftOver = fparser.sp.end - fparser.sp.pos;
310
sp.pos = sp.end - leftOver; // reset our parser to the same amount of leftover
312
// the value was via the "v" param in localParams, so we need to find
313
// the end of the local params themselves to pick up where we left off
314
sp.pos = start + fparser.localParamsEnd;
317
// need to find the end of the function query via the string parser
318
int leftOver = fparser.sp.end - fparser.sp.pos;
319
sp.pos = sp.end - leftOver; // reset our parser to the same amount of leftover
322
// A QParser that's not for function queries.
323
// It must have been specified via local params.
324
q = parser.getQuery();
326
assert parser.getLocalParams() != null;
327
sp.pos = start + parser.localParamsEnd;
330
Boolean top = sp.getSortDirection();
332
// we have a Query and a valid direction
333
if (q instanceof FunctionQuery) {
334
lst.add(((FunctionQuery)q).getValueSource().getSortField(top));
336
lst.add((new QueryValueSource(q, 0.0f)).getSortField(top));
340
} catch (IOException ioe) {
342
} catch (Exception e) {
343
// hang onto this in case the string isn't a full field name either
344
qParserException = e;
348
// if we made it here, we either have a "simple" field name,
349
// or there was a problem parsing the string as a complex func/quer
352
// try again, simple rules for a field name with no whitespace
354
field = sp.getSimpleString();
356
Boolean top = sp.getSortDirection();
358
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
359
"Can't determine Sort Order: " + sp);
362
if (SCORE.equals(field)) {
364
lst.add(SortField.FIELD_SCORE);
366
lst.add(new SortField(null, SortField.SCORE, true));
368
} else if (DOCID.equals(field)) {
369
lst.add(new SortField(null, SortField.DOC, top));
371
// try to find the field
372
SchemaField sf = req.getSchema().getFieldOrNull(field);
374
if (null != qParserException) {
375
throw new SolrException
376
(SolrException.ErrorCode.BAD_REQUEST,
377
"sort param could not be parsed as a query, and is not a "+
378
"field that exists in the index: " + field,
381
throw new SolrException
382
(SolrException.ErrorCode.BAD_REQUEST,
383
"sort param field can't be found: " + field);
385
lst.add(sf.getSortField(top));
389
} catch (ParseException e) {
390
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error in sort: " + sortSpec, e);
391
} catch (IOException e) {
392
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "error in sort: " + sortSpec, e);
396
// normalize a sort on score desc to null
397
if (lst.size()==1 && lst.get(0) == SortField.FIELD_SCORE) {
401
return new Sort(lst.toArray(new SortField[lst.size()]));
406
///////////////////////////
407
///////////////////////////
408
///////////////////////////
410
static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException {
412
ft = schema.getFieldTypeNoEx(name);
415
out.append("(UNKNOWN FIELD " + name + ')');
421
static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException {
424
out.append(ft.indexedToReadable(val));
425
} catch (Exception e) {
426
out.append("EXCEPTION(val=");
436
* @see #toString(Query,IndexSchema)
438
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
439
boolean writeBoost = true;
441
if (query instanceof TermQuery) {
442
TermQuery q = (TermQuery) query;
443
Term t = q.getTerm();
444
FieldType ft = writeFieldName(t.field(), schema, out, flags);
445
writeFieldVal(t.text(), ft, out, flags);
446
} else if (query instanceof TermRangeQuery) {
447
TermRangeQuery q = (TermRangeQuery) query;
448
String fname = q.getField();
449
FieldType ft = writeFieldName(fname, schema, out, flags);
450
out.append(q.includesLower() ? '[' : '{');
451
String lt = q.getLowerTerm();
452
String ut = q.getUpperTerm();
456
writeFieldVal(lt, ft, out, flags);
464
writeFieldVal(ut, ft, out, flags);
467
out.append(q.includesUpper() ? ']' : '}');
468
} else if (query instanceof NumericRangeQuery) {
469
NumericRangeQuery q = (NumericRangeQuery) query;
470
String fname = q.getField();
471
FieldType ft = writeFieldName(fname, schema, out, flags);
472
out.append(q.includesMin() ? '[' : '{');
473
Number lt = q.getMin();
474
Number ut = q.getMax();
478
out.append(lt.toString());
486
out.append(ut.toString());
489
out.append(q.includesMax() ? ']' : '}');
490
} else if (query instanceof BooleanQuery) {
491
BooleanQuery q = (BooleanQuery) query;
492
boolean needParens = false;
494
if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) {
500
boolean first = true;
501
for (BooleanClause c : q.clauses()) {
508
if (c.isProhibited()) {
510
} else if (c.isRequired()) {
513
Query subQuery = c.getQuery();
514
boolean wrapQuery = false;
516
// TODO: may need to put parens around other types
517
// of queries too, depending on future syntax.
518
if (subQuery instanceof BooleanQuery) {
526
toString(subQuery, schema, out, flags);
536
if (q.getMinimumNumberShouldMatch() > 0) {
538
out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
541
} else if (query instanceof PrefixQuery) {
542
PrefixQuery q = (PrefixQuery) query;
543
Term prefix = q.getPrefix();
544
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
545
out.append(prefix.text());
547
} else if (query instanceof ConstantScorePrefixQuery) {
548
ConstantScorePrefixQuery q = (ConstantScorePrefixQuery) query;
549
Term prefix = q.getPrefix();
550
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
551
out.append(prefix.text());
553
} else if (query instanceof WildcardQuery) {
554
out.append(query.toString());
556
} else if (query instanceof FuzzyQuery) {
557
out.append(query.toString());
559
} else if (query instanceof ConstantScoreQuery) {
560
out.append(query.toString());
563
out.append(query.getClass().getSimpleName()
564
+ '(' + query.toString() + ')');
568
if (writeBoost && query.getBoost() != 1.0f) {
570
out.append(Float.toString(query.getBoost()));
576
* Formats a Query for debugging, using the IndexSchema to make
577
* complex field types readable.
580
* The benefit of using this method instead of calling
581
* <code>Query.toString</code> directly is that it knows about the data
582
* types of each field, so any field which is encoded in a particularly
583
* complex way is still readable. The downside is that it only knows
584
* about built in Query types, and will not be able to format custom
588
public static String toString(Query query, IndexSchema schema) {
590
StringBuilder sb = new StringBuilder();
591
toString(query, schema, sb, 0);
592
return sb.toString();
593
} catch (Exception e) {
594
throw new RuntimeException(e);
599
* Simple class to help with parsing a string
600
* <b>Note: This API is experimental and may change in non backward-compatible ways in the future</b>
602
public static class StrParser {
607
public StrParser(String val) {
608
this(val, 0, val.length());
611
public StrParser(String val, int start, int end) {
618
while (pos < end && Character.isWhitespace(val.charAt(pos))) pos++;
622
return pos < end ? val.charAt(pos) : 0;
625
void skip(int nChars) {
626
pos = Math.max(pos + nChars, end);
629
boolean opt(String s) {
631
int slen = s.length();
632
if (val.regionMatches(pos, s, 0, slen)) {
639
boolean opt(char ch) {
641
if (val.charAt(pos) == ch) {
649
void expect(String s) throws ParseException {
651
int slen = s.length();
652
if (val.regionMatches(pos, s, 0, slen)) {
655
throw new ParseException("Expected '" + s + "' at position " + pos + " in '" + val + "'");
659
float getFloat() throws ParseException {
661
char[] arr = new char[end - pos];
663
for (i = 0; i < arr.length; i++) {
664
char ch = val.charAt(pos);
665
if ((ch >= '0' && ch <= '9')
666
|| ch == '+' || ch == '-'
667
|| ch == '.' || ch == 'e' || ch == 'E'
676
return Float.parseFloat(new String(arr, 0, i));
679
Number getNumber() throws ParseException {
685
char ch = val.charAt(pos);
686
if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-') {
688
} else if (ch == '.' || ch =='e' || ch=='E') {
696
String v = val.substring(start,pos);
697
return flt ? Double.parseDouble(v) : Long.parseLong(v);
700
double getDouble() throws ParseException {
702
char[] arr = new char[end - pos];
704
for (i = 0; i < arr.length; i++) {
705
char ch = val.charAt(pos);
706
if ((ch >= '0' && ch <= '9')
707
|| ch == '+' || ch == '-'
708
|| ch == '.' || ch == 'e' || ch == 'E'
717
return Double.parseDouble(new String(arr, 0, i));
720
int getInt() throws ParseException {
722
char[] arr = new char[end - pos];
724
for (i = 0; i < arr.length; i++) {
725
char ch = val.charAt(pos);
726
if ((ch >= '0' && ch <= '9')
727
|| ch == '+' || ch == '-'
736
return Integer.parseInt(new String(arr, 0, i));
740
String getId() throws ParseException {
741
return getId("Expected identifier");
744
String getId(String errMessage) throws ParseException {
748
if (pos < end && (ch = val.charAt(pos)) != '$' && Character.isJavaIdentifierStart(ch)) {
751
ch = val.charAt(pos);
752
if (!Character.isJavaIdentifierPart(ch) && ch != '.') {
757
return val.substring(id_start, pos);
760
if (errMessage != null) {
761
throw new ParseException(errMessage + " at pos " + pos + " str='" + val + "'");
767
* Skips leading whitespace and returns whatever sequence of non
768
* whitespace it can find (or hte empty string)
770
String getSimpleString() {
775
ch = val.charAt(pos);
776
if (Character.isWhitespace(ch)) break;
779
return val.substring(startPos, pos);
783
* Sort direction or null if current position does not inidcate a
784
* sort direction. (True is desc, False is asc).
785
* Position is advanced to after the comma (or end) when result is non null
787
Boolean getSortDirection() throws ParseException {
788
final int startPos = pos;
789
final String order = getId(null);
794
if ("desc".equals(order) || "top".equals(order)) {
796
} else if ("asc".equals(order) || "bottom".equals(order)) {
800
// it's not a legal direction if more stuff comes after it
805
} else if (',' == c) {
812
if (null == top) pos = startPos; // no direction, reset
816
// return null if not a string
817
String getQuotedString() throws ParseException {
819
char delim = peekChar();
820
if (!(delim == '\"' || delim == '\'')) {
823
int val_start = ++pos;
824
StringBuilder sb = new StringBuilder(); // needed for escaping
827
throw new ParseException("Missing end quote for string at pos " + (val_start - 1) + " str='" + val + "'");
829
char ch = val.charAt(pos);
832
if (pos >= end) break;
833
ch = val.charAt(pos);
851
if (pos + 4 >= end) {
852
throw new ParseException("bad unicode escape \\uxxxx at pos" + (val_start - 1) + " str='" + val + "'");
854
ch = (char) Integer.parseInt(val.substring(pos + 1, pos + 5), 16);
858
} else if (ch == delim) {
859
pos++; // skip over the quote
866
return sb.toString();
869
// next non-whitespace char
872
return pos < end ? val.charAt(pos) : 0;
877
return pos < end ? val.charAt(pos) : 0;
881
public String toString() {
882
return "'" + val + "'" + ", pos=" + pos;
888
* Builds a list of String which are stringified versions of a list of Queries
890
public static List<String> toString(List<Query> queries, IndexSchema schema) {
891
List<String> out = new ArrayList<String>(queries.size());
892
for (Query q : queries) {
893
out.add(QueryParsing.toString(q, schema));
899
* Parse a function, returning a FunctionQuery
902
* Syntax Examples....
906
* // Numeric fields default to correct type
907
* // (ie: IntFieldSource or FloatFieldSource)
908
* // Others use explicit ord(...) to generate numeric field value
914
* // ReverseOrdFieldSource
917
* // LinearFloatFunction on numeric field value
918
* linear(myfield,1,2)
920
* // MaxFloatFunction of LinearFloatFunction on numeric field value or constant
921
* max(linear(myfield,1,2),100)
923
* // ReciprocalFloatFunction on numeric field value
924
* recip(myfield,1,2,3)
926
* // ReciprocalFloatFunction on ReverseOrdFieldSource
927
* recip(rord(myfield),1,2,3)
929
* // ReciprocalFloatFunction on LinearFloatFunction on ReverseOrdFieldSource
930
* recip(linear(rord(myfield),1,2),3,4,5)
933
public static FunctionQuery parseFunction(String func, IndexSchema schema) throws ParseException {
934
SolrCore core = SolrCore.getSolrCore();
935
return (FunctionQuery) (QParser.getParser(func, "func", new LocalSolrQueryRequest(core, new HashMap())).parse());
936
// return new FunctionQuery(parseValSource(new StrParser(func), schema));