1
# This module is part of the Lupy project and is Copyright 2003 Amir
2
# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
3
# it and/or modify it under the terms of version 2.1 of the GNU Lesser
4
# General Public License as published by the Free Software Foundation.
11
"""A Query that matches documents matching boolean combinations of
12
other queries, typically L{lupy.search.term.TermQuery}s or L{lupy.search.phrase.PhraseQuery}s."""
16
"""Constructs an empty boolean query."""
21
def addClause(self, clause):
22
"""Adds a BooleanClause to this query."""
23
self.clauses.append(clause)
26
def add(self, query, required, prohibited):
27
"""Adds a clause to a boolean query. Clauses may be:
28
C{required} which means that documents which I{do not}
29
match this sub-query will I{not} match the boolean query;
30
C{prohibited} which means that documents which I{do}
31
match this sub-query will I{not} match the boolean query; or
32
neither, in which case matched documents are neither prohibited from
33
nor required to match the sub-query.
35
It is an error to specify a clause as both C{required} and
38
self.clauses.append(BooleanClause(query,
43
def normalize(self, norm):
44
for c in self.clauses:
46
c.query.normalize(norm)
48
def scorer(self, reader):
49
# optimize zero-term case
50
if len(self.clauses) == 1:
51
# just return term scorer
54
return c.query.scorer(reader)
56
result = BooleanScorer()
58
for c in self.clauses:
59
subScorer = c.query.scorer(reader)
60
if subScorer is not None:
61
result.add(subScorer, c.required, c.prohibited)
68
def sumOfSquaredWeights(self, searcher):
71
for c in self.clauses:
73
# sum sub-query weights
74
sum += c.query.sumOfSquaredWeights(searcher)
76
# allow complex queries to initialize themself
77
c.query.sumOfSquaredWeights(searcher)
81
def toString(self, field):
82
"""Prints a user-readable version of this query"""
86
for c in self.clauses:
93
if isinstance(subQuery, BooleanQuery):
94
# wrap sub-bools in parens
96
buffer += c.query.toString(field)
99
buffer += c.query.toString(field)
103
class BooleanClause(object):
104
"""A clause in a BooleanQuery"""
106
def __init__(self, q, r, p):
114
self.coordFactors = None
117
self.prohibitedMask = 0
118
self.requiredMask = 0
124
def add(self, scorer, required, prohibited):
126
if required or prohibited:
127
if self.nextMask == 0:
128
raise Exception, 'More than 32 required/prohibited clauses in a query.'
130
self.nextMask = self.nextMask << 1
139
# Update prohibited mask
140
self.prohibitedMask |= mask
142
# Update required mask
143
self.requiredMask |= mask
145
self.scorers.append(SubScorer(scorer, required, prohibited, mask))
148
def computeCoordFactors(self):
149
self.coordFactors = []
150
for i in range(self.maxCoord):
151
self.coordFactors.append(similarity.coord(i, self.maxCoord))
154
def collect(self, doc, score, mask):
155
bucket = self.table.get(doc, None)
157
#doc, score, bits, coord
158
bucket = [-1, 0, 0, 0]
159
self.table[doc] = bucket
163
bucket[:] = [doc, score, mask, 1]
164
self.validList.append(bucket)
173
#print doc, score, mask, bucket
176
def score(self, maxDoc):
177
if self.coordFactors is None:
178
self.computeCoordFactors()
179
for t in self.scorers:
180
#print "SCORER %r" % t.scorer
181
for d,score in t.scorer.score(maxDoc):
182
#print "DOCUMENT %r %r" % (d, score)
183
self.collect(d,score,t.mask)
184
return self.collectHits()
186
def collectHits(self):
187
for bucket in self.validList:
188
doc, score, bits, coord = bucket
189
if (bits & self.prohibitedMask) == 0 and (bits & self.requiredMask) == self.requiredMask:
190
# if prohibited and required check out
192
#print "CollectHits:", doc, score, self.coordFactors, coord
194
scorecf = score * self.coordFactors[coord]
195
except IndexError, err: # XXX ugly way to avoid it crashing 8(
198
del self.validList[:]
201
class SubScorer(object):
203
def __init__(self, scorer, required, prohibited, mask):
205
self.required = required
206
self.prohibited = prohibited