1
# This module is part of the Lupy project and is Copyright 2003 Amir
2
# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
3
# it and/or modify it under the terms of version 2.1 of the GNU Lesser
4
# General Public License as published by the Free Software Foundation.
8
def __init__(self, fld, txt, intern=False):
11
def __cmp__(self, other):
12
"""Compares two terms, returning an integer which is less than zero iff this
13
term belongs after the argument, equal zero iff this term is equal to the
14
argument, and greater than zero iff this term belongs after the argument.
16
The ordering of terms is first by field, then by text."""
18
if self.fld == other.fld:
20
return cmp(self.txt, other.txt)
22
return cmp(self.fld, other.fld)
30
def readObject(self, inp):
31
inp.defaultReadObject()
33
def set(self, fld, txt):
36
self._hash = hash(fld + txt)
42
return 'Term<'+self.fld.encode('utf8')+':'+self.txt.encode('utf8')+'>'
44
class TermInfo(object):
51
def set(self, df, fp, pp):
57
self.docFreq = ti.docFreq
58
self.freqPointer = ti.freqPointer
59
self.proxPointer = ti.proxPointer
62
return '<TermInfo:d:' + str(self.docFreq)+ ' f:' + str(self.freqPointer) +\
63
' p:' + str(self.proxPointer) + '>'
66
class TermInfosWriter(object):
70
def __init__(self, d, seg, fis, isIndex = False):
72
self.initialize(d, seg, fis, isIndex)
75
self.lastIndexPointer = 0
76
self.lastTerm = Term('','')
77
self.lastTi = TermInfo()
80
self.other = TermInfosWriter(d, seg, fis, True)
81
self.other.other = self
84
def initialize(self, d, seg, fis, isi):
92
self.output=d.createFile(seg + ext)
93
# leave space for size
94
self.output.writeInt(0)
97
def stringDifference(self, s1, s2):
98
prefixLength = min(len(s1), len(s2))
99
for i in range(prefixLength):
106
def add(self, term, ti):
107
if not self.isIndex and term <= self.lastTerm:
108
raise Exception, "term out of order: " + str(term) + str(self.lastTerm)
109
if ti.freqPointer < self.lastTi.freqPointer:
110
raise Exception, "freqPointer out of order"
111
if ti.proxPointer < self.lastTi.proxPointer:
112
raise Exception, "proxPointer out of order"
114
if (not self.isIndex and self.size % self.INDEX_INTERVAL == 0):
116
self.other.add(self.lastTerm, self.lastTi)
121
self.output.writeVInt(ti.docFreq)
123
self.output.writeVLong(ti.freqPointer - self.lastTi.freqPointer)
124
self.output.writeVLong(ti.proxPointer - self.lastTi.proxPointer)
127
self.output.writeVLong(self.other.output.getFilePointer() - self.lastIndexPointer)
128
self.lastIndexPointer = self.other.output.getFilePointer()
130
self.lastTi.setTo(ti)
136
self.output.writeInt(self.size)
139
if self.isIndex is not True:
143
def writeTerm(self, term):
144
a, b = self.lastTerm.text(), term.text()
145
start = self.stringDifference(a, b)
146
delta = term.text()[start:]
147
# write shared prefix length
148
self.output.writeVInt(start)
150
self.output.writeString(delta)
152
i = self.fieldInfos.fieldNumber(term.field())
153
self.output.writeVInt(i)