~ubuntu-branches/ubuntu/natty/moin/natty-updates

« back to all changes in this revision

Viewing changes to MoinMoin/search/queryparser/__init__.py

  • Committer: Bazaar Package Importer
  • Author(s): Jonas Smedegaard
  • Date: 2008-06-22 21:17:13 UTC
  • mto: This revision was merged to the branch mainline in revision 18.
  • Revision ID: james.westby@ubuntu.com-20080622211713-inlv5k4eifxckelr
ImportĀ upstreamĀ versionĀ 1.7.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# -*- coding: iso-8859-1 -*-
2
 
"""
3
 
    MoinMoin - search query parser
4
 
 
5
 
    @copyright: 2005 MoinMoin:FlorianFesti,
6
 
                2005 MoinMoin:NirSoffer,
7
 
                2005 MoinMoin:AlexanderSchremmer,
8
 
                2006-2008 MoinMoin:ThomasWaldmann,
9
 
                2006 MoinMoin:FranzPletz
10
 
    @license: GNU GPL, see COPYING for details
11
 
"""
12
 
 
13
 
import re
14
 
 
15
 
from MoinMoin import log
16
 
logging = log.getLogger(__name__)
17
 
 
18
 
from MoinMoin import config, wikiutil
19
 
from MoinMoin.search.queryparser.expressions import AndExpression, OrExpression, TextSearch, TitleSearch, \
20
 
    LinkSearch, CategorySearch, DomainSearch, MimetypeSearch, LanguageSearch
21
 
 
22
 
 
23
 
class QueryError(ValueError):
24
 
    """ error raised for problems when parsing the query """
25
 
 
26
 
 
27
 
class QueryParser(object):
28
 
    """
29
 
    Converts a String into a tree of Query objects.
30
 
    """
31
 
 
32
 
    def __init__(self, **kw):
33
 
        """
34
 
        @keyword titlesearch: treat all terms as title searches
35
 
        @keyword case: do case sensitive search
36
 
        @keyword regex: treat all terms as regular expressions
37
 
        """
38
 
        self.titlesearch = kw.get('titlesearch', 0)
39
 
        self.case = kw.get('case', 0)
40
 
        self.regex = kw.get('regex', 0)
41
 
        self._M = wikiutil.ParserPrefix('-')
42
 
 
43
 
    def _analyse_items(self, items):
44
 
        terms = AndExpression()
45
 
        M = self._M
46
 
        while items:
47
 
            item = items[0]
48
 
            items = items[1:]
49
 
 
50
 
            if isinstance(item, unicode):
51
 
                if item.lower() == 'or':
52
 
                    sub = terms.subterms()
53
 
                    if len(sub) >= 1:
54
 
                        last = sub[-1]
55
 
                        if last.__class__ == OrExpression:
56
 
                            orexpr = last
57
 
                        else:
58
 
                            # Note: do NOT reduce "terms" when it has a single subterm only!
59
 
                            # Doing that would break "-someterm" searches as we rely on AndExpression
60
 
                            # doing a "MatchAll AND_NOT someterm" for that case!
61
 
                            orexpr = OrExpression(terms)
62
 
                        terms = AndExpression(orexpr)
63
 
                    else:
64
 
                        raise QueryError('Nothing to OR')
65
 
                    remaining = self._analyse_items(items)
66
 
                    if remaining.__class__ == OrExpression:
67
 
                        for sub in remaining.subterms():
68
 
                            orexpr.append(sub)
69
 
                    else:
70
 
                        orexpr.append(remaining)
71
 
                    break
72
 
                elif item.lower() == 'and':
73
 
                    pass
74
 
                else:
75
 
                    # odd workaround; we should instead ignore this term
76
 
                    # and reject expressions that contain nothing after
77
 
                    # being parsed rather than rejecting an empty string
78
 
                    # before parsing...
79
 
                    if not item:
80
 
                        raise QueryError("Term too short")
81
 
                    regex = self.regex
82
 
                    case = self.case
83
 
                    if self.titlesearch:
84
 
                        terms.append(TitleSearch(item, use_re=regex, case=case))
85
 
                    else:
86
 
                        terms.append(TextSearch(item, use_re=regex, case=case))
87
 
            elif isinstance(item, tuple):
88
 
                negate = item[0] == M
89
 
                title_search = self.titlesearch
90
 
                regex = self.regex
91
 
                case = self.case
92
 
                linkto = False
93
 
                lang = False
94
 
                category = False
95
 
                mimetype = False
96
 
                domain = False
97
 
                while len(item) > 1:
98
 
                    m = item[0]
99
 
                    if m is None:
100
 
                        raise QueryError("Invalid search prefix")
101
 
                    elif m == M:
102
 
                        negate = True
103
 
                    elif "title".startswith(m):
104
 
                        title_search = True
105
 
                    elif "regex".startswith(m):
106
 
                        regex = True
107
 
                    elif "case".startswith(m):
108
 
                        case = True
109
 
                    elif "linkto".startswith(m):
110
 
                        linkto = True
111
 
                    elif "language".startswith(m):
112
 
                        lang = True
113
 
                    elif "category".startswith(m):
114
 
                        category = True
115
 
                    elif "mimetype".startswith(m):
116
 
                        mimetype = True
117
 
                    elif "domain".startswith(m):
118
 
                        domain = True
119
 
                    else:
120
 
                        raise QueryError("Invalid search prefix")
121
 
                    item = item[1:]
122
 
 
123
 
                text = item[0]
124
 
                if category:
125
 
                    obj = CategorySearch(text, use_re=regex, case=case)
126
 
                elif mimetype:
127
 
                    obj = MimetypeSearch(text, use_re=regex, case=False)
128
 
                elif lang:
129
 
                    obj = LanguageSearch(text, use_re=regex, case=False)
130
 
                elif linkto:
131
 
                    obj = LinkSearch(text, use_re=regex, case=case)
132
 
                elif domain:
133
 
                    obj = DomainSearch(text, use_re=regex, case=False)
134
 
                elif title_search:
135
 
                    obj = TitleSearch(text, use_re=regex, case=case)
136
 
                else:
137
 
                    obj = TextSearch(text, use_re=regex, case=case)
138
 
                obj.negated = negate
139
 
                terms.append(obj)
140
 
            elif isinstance(item, list):
141
 
                # strip off the opening parenthesis
142
 
                terms.append(self._analyse_items(item[1:]))
143
 
 
144
 
        # Note: do NOT reduce "terms" when it has a single subterm only!
145
 
        # Doing that would break "-someterm" searches as we rely on AndExpression
146
 
        # doing a "MatchAll AND_NOT someterm" for that case!
147
 
        return terms
148
 
 
149
 
    def parse_query(self, query):
150
 
        """ transform an string into a tree of Query objects """
151
 
        if isinstance(query, str):
152
 
            query = query.decode(config.charset)
153
 
        try:
154
 
            items = wikiutil.parse_quoted_separated_ext(query,
155
 
                                                        name_value_separator=':',
156
 
                                                        prefixes='-',
157
 
                                                        multikey=True,
158
 
                                                        brackets=('()', ),
159
 
                                                        quotes='\'"')
160
 
        except wikiutil.BracketError, err:
161
 
            raise QueryError(str(err))
162
 
        logging.debug("parse_quoted_separated items: %r" % items)
163
 
        query = self._analyse_items(items)
164
 
        logging.debug("analyse_items query: %r" % query)
165
 
        return query