1
# -*- coding: iso-8859-1 -*-
3
MoinMoin - search query parser
5
@copyright: 2005 MoinMoin:FlorianFesti,
6
2005 MoinMoin:NirSoffer,
7
2005 MoinMoin:AlexanderSchremmer,
8
2006-2008 MoinMoin:ThomasWaldmann,
9
2006 MoinMoin:FranzPletz
10
@license: GNU GPL, see COPYING for details
15
from MoinMoin import log
16
logging = log.getLogger(__name__)
18
from MoinMoin import config, wikiutil
19
from MoinMoin.search.queryparser.expressions import AndExpression, OrExpression, TextSearch, TitleSearch, \
20
LinkSearch, CategorySearch, DomainSearch, MimetypeSearch, LanguageSearch
23
class QueryError(ValueError):
24
""" error raised for problems when parsing the query """
27
class QueryParser(object):
29
Converts a String into a tree of Query objects.
32
def __init__(self, **kw):
34
@keyword titlesearch: treat all terms as title searches
35
@keyword case: do case sensitive search
36
@keyword regex: treat all terms as regular expressions
38
self.titlesearch = kw.get('titlesearch', 0)
39
self.case = kw.get('case', 0)
40
self.regex = kw.get('regex', 0)
41
self._M = wikiutil.ParserPrefix('-')
43
def _analyse_items(self, items):
44
terms = AndExpression()
50
if isinstance(item, unicode):
51
if item.lower() == 'or':
52
sub = terms.subterms()
55
if last.__class__ == OrExpression:
58
# Note: do NOT reduce "terms" when it has a single subterm only!
59
# Doing that would break "-someterm" searches as we rely on AndExpression
60
# doing a "MatchAll AND_NOT someterm" for that case!
61
orexpr = OrExpression(terms)
62
terms = AndExpression(orexpr)
64
raise QueryError('Nothing to OR')
65
remaining = self._analyse_items(items)
66
if remaining.__class__ == OrExpression:
67
for sub in remaining.subterms():
70
orexpr.append(remaining)
72
elif item.lower() == 'and':
75
# odd workaround; we should instead ignore this term
76
# and reject expressions that contain nothing after
77
# being parsed rather than rejecting an empty string
80
raise QueryError("Term too short")
84
terms.append(TitleSearch(item, use_re=regex, case=case))
86
terms.append(TextSearch(item, use_re=regex, case=case))
87
elif isinstance(item, tuple):
89
title_search = self.titlesearch
100
raise QueryError("Invalid search prefix")
103
elif "title".startswith(m):
105
elif "regex".startswith(m):
107
elif "case".startswith(m):
109
elif "linkto".startswith(m):
111
elif "language".startswith(m):
113
elif "category".startswith(m):
115
elif "mimetype".startswith(m):
117
elif "domain".startswith(m):
120
raise QueryError("Invalid search prefix")
125
obj = CategorySearch(text, use_re=regex, case=case)
127
obj = MimetypeSearch(text, use_re=regex, case=False)
129
obj = LanguageSearch(text, use_re=regex, case=False)
131
obj = LinkSearch(text, use_re=regex, case=case)
133
obj = DomainSearch(text, use_re=regex, case=False)
135
obj = TitleSearch(text, use_re=regex, case=case)
137
obj = TextSearch(text, use_re=regex, case=case)
140
elif isinstance(item, list):
141
# strip off the opening parenthesis
142
terms.append(self._analyse_items(item[1:]))
144
# Note: do NOT reduce "terms" when it has a single subterm only!
145
# Doing that would break "-someterm" searches as we rely on AndExpression
146
# doing a "MatchAll AND_NOT someterm" for that case!
149
def parse_query(self, query):
150
""" transform an string into a tree of Query objects """
151
if isinstance(query, str):
152
query = query.decode(config.charset)
154
items = wikiutil.parse_quoted_separated_ext(query,
155
name_value_separator=':',
160
except wikiutil.BracketError, err:
161
raise QueryError(str(err))
162
logging.debug("parse_quoted_separated items: %r" % items)
163
query = self._analyse_items(items)
164
logging.debug("analyse_items query: %r" % query)