1
# This module is part of the Lupy project and is Copyright 2003 Amir
2
# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
3
# it and/or modify it under the terms of version 2.1 of the GNU Lesser
4
# General Public License as published by the Free Software Foundation.
7
from bisect import insort
8
from MoinMoin.support.lupy.search import term, similarity
12
"""A query that matches documents containing a particular
13
sequence of terms. This may be combined with other terms
14
with a L{lupy.search.boolean.BooleanQuery}.
18
"""Constructs an empty phrase query."""
27
"""Adds a term to the end of the query phrase."""
28
if len(self.terms) == 0:
29
self.field = term.field()
31
elif term.field() != self.field:
32
raise Exception, 'All phrase terms must be in the same field: ' + str(term)
34
self.terms.append(term)
38
"""Returns the slop. See setSlop()."""
42
def normalize(self, norm):
45
# factor from document
46
self.weight *= self.idf
49
def scorer(self, reader):
50
# optimize zero-term case
51
if len(self.terms) == 0:
54
# optimize one-term case
55
if len(self.terms) == 1:
57
docs = reader.termDocsTerm(t)
60
return term.TermScorer(docs, reader.normsField(t.field()), self.weight)
65
p = reader.termPositionsTerm(t)
67
# I am not sure how this is ever reached?
72
return ExactPhraseScorer(tps, reader.normsField(self.field),
75
return SloppyPhraseScorer(tps, reader.norms(self.field),
79
def sumOfSquaredWeights(self, searcher):
81
for term in self.terms:
82
self.idf += similarity.idfTerm(term, searcher)
84
self.weight = self.idf * self.boost
86
return self.weight * self.weight
89
def toString(self, f):
90
"""Prints a user-readable version of this query"""
93
if not self.field == f :
97
for term in self.terms[:-1]:
98
buffer += term.text() + ' '
100
buffer += self.terms[-1].text() + '\\'
103
buffer += '~' + str(self.slop)
105
if self.boost != 1.0:
106
buffer += '^' + str(self.boost)
113
def __init__(self, tps, n, w):
117
self.pps = [PhrasePositions(tp, i) for i, tp in enumerate(tps)]
120
def phraseQuery(self):
121
"""Subclass responsibility"""
123
def score(self, end):
124
# find doc w/ all the terms
125
while self.pps[-1].doc < end:
126
while self.pps[0].doc < self.pps[-1].doc:
127
self.pps[0].advance()
128
while self.pps[0].doc < self.pps[-1].doc:
129
self.pps[0].advance()
130
self.pps.append(self.pps.pop(0))
131
if self.pps[-1].doc >= end:
134
# found doc with all terms
136
freq = self.phraseFreq()
140
score = similarity.tf(freq) * self.weight
142
score *= similarity.normByte(self.norms[self.pps[0].doc])
144
yield (self.pps[0].doc, score)
146
self.pps[-1].advance()
151
class ExactPhraseScorer(PhraseScorer):
153
def phraseFreq(self):
160
# the 'init' bits are to simulate a do-while loop :-/
161
while init == 0 or self.pps[-1].nextPosition():
162
while self.pps[0].position < self.pps[-1].position:
163
# scan forward in first
165
while init2 == 0 or self.pps[0].position < self.pps[-1].position:
166
if not self.pps[0].nextPosition():
170
self.pps.append(self.pps.pop(0))
178
class PhrasePositions(object):
180
def __init__(self, t, o):
187
self.tpiter = iter(t)
191
def firstPosition(self):
192
self.count = self.tp.frq
197
"""Increments to next doc"""
199
for doc, frq, nextPos in self.tpiter:
202
self._nextPos = nextPos
209
self.doc = sys.maxint
213
def nextPosition(self):
216
# read subsequent positions
217
self.position = self._nextPos.next() - self.offset
225
res = '<pp>d:' + str(self.doc) + ' p:' + str(self.position) + ' o:' + str(self.offset)
228
def __lt__(this, that):
229
if this.doc == that.doc:
230
return this.position < that.position
232
return this.doc < that.doc