4
# update-apt-xapian-index - Maintain a system-wide Xapian index of Debian
7
# Copyright (C) 2007 Enrico Zini <enrico@debian.org>
9
# This program is free software; you can redistribute it and/or modify
10
# it under the terms of the GNU General Public License as published by
11
# the Free Software Foundation; either version 2 of the License, or
12
# (at your option) any later version.
14
# This program is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
# GNU General Public License for more details.
19
# You should have received a copy of the GNU General Public License
20
# along with this program; if not, write to the Free Software
21
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
PLUGINDIR = os.environ.get("AXI_PLUGIN_DIR", "/usr/share/apt-xapian-index/plugins")
28
XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index")
29
XAPIANDBSTAMP = XAPIANDBPATH + "/update-timestamp"
30
XAPIANDBLOCK = XAPIANDBPATH + "/update-lock"
31
XAPIANDBVALUES = XAPIANDBPATH + "/values"
32
XAPIANDBDOC = XAPIANDBPATH + "/README"
38
self.is_verbose = False
39
def begin(self, task):
41
print "%s..." % self.task,
44
def progress(self, percent):
45
print "\r%s... %d%%" % (self.task, percent),
49
print "\r%s: done. " % self.task
51
def verbose(self, *args):
52
if not self.is_verbose: return
57
def notice(self, *args):
60
print >>sys.stderr, " ".join(args)
62
def warning(self, *args):
65
print >>sys.stderr, " ".join(args)
67
def error(self, *args):
70
print >>sys.stderr, " ".join(args)
74
def begin(self, task):
76
def progress(self, percent):
80
def verbose(self, *args):
82
def notice(self, *args):
84
def warning(self, *args):
85
print >>sys.stderr, " ".join(args)
86
def error(self, *args):
87
print >>sys.stderr, " ".join(args)
90
def __init__(self, file):
91
self.name = os.path.basename(file)
92
self.name = os.path.splitext(self.name)[0]
93
self.filename = os.path.basename(file)
94
self.module = imp.load_source(self.name, file)
95
self.obj = self.module.init()
97
self.info = self.obj.info()
100
# Function definitions
103
def readPlugins(plugindir, progress):
105
Read the addons, in sorted order
108
for file in sorted(glob.glob(PLUGINDIR+"/*.py")):
109
progress.verbose("Reading plugin %s." % file)
110
# Skip non-files and hidden files
111
if not os.path.isfile(file) or file[0] == '.':
114
if addon.obj != None:
118
def buildIndex(pathname, addons, progress):
120
Create a new Xapian index with the content provided by the addons
122
progress.begin("Rebuilding Xapian index")
124
# Create a new Xapian index
125
db = xapian.WritableDatabase(pathname, xapian.DB_CREATE_OR_OVERWRITE)
126
# It seems to be faster without transactions, at the moment
127
#db.begin_transaction(False)
129
# Iterate all Debian packages
132
for idx, pkg in enumerate(cache):
134
if idx % 200 == 0: progress.progress(100*idx/count)
136
document = xapian.Document()
138
# The document data is the package name
139
document.set_data(pkg.name)
141
# Index the package name with a special prefix, to be able to find this
142
# document by exact package name match
143
document.add_term("XP"+pkg.name)
145
# Have all the various plugins index their things
147
addon.obj.index(document, pkg)
149
# Add the document to the index
150
db.add_document(document)
151
#db.commit_transaction();
155
def writeValues(pathname, values, values_desc, progress):
157
Write the value information on the given file
159
progress.verbose("Writing value information to %s." % pathname)
160
out = open(pathname+".tmp", "w")
162
print >>out, textwrap.dedent("""
163
# This file contains the mapping between names of numeric values indexed in the
164
# APT Xapian index and their index
166
# Xapian allows to index numeric values as well as keywords and to use them for
167
# all sorts of useful querying tricks. However, every numeric value needs to
168
# have a unique index, and this configuration file is needed to record which
169
# indices are allocated and to provide a mnemonic name for them.
171
# The format is exactly like /etc/services with name, number and optional
172
# aliases, with the difference that the second column does not use the
173
# "/protocol" part, which would be meaningless here.
176
for name, idx in sorted(values.iteritems(), key=lambda x: x[1]):
177
desc = values_desc[name]
178
print >>out, "%s\t%d\t# %s" % (name, idx, desc)
181
# Atomic update of the documentation
182
os.rename(pathname+".tmp", pathname)
184
def writeDoc(pathname, addons, progress):
186
Write the documentation in the given file
188
progress.verbose("Writing documentation to %s." % pathname)
189
# Collect the documentation
193
doc = addon.obj.doc()
197
shortDesc = doc['shortDesc'],
198
fullDoc = doc['fullDoc']))
200
# If a plugin has problem returning documentation, don't worry about it
201
progress.notice("Skipping documentation for plugin", addon.filename)
203
# Write the documentation in pathname
204
out = open(pathname+".tmp", "w")
205
print >>out, textwrap.dedent("""
210
This Xapian database indexes Debian package information. To query the
211
database, open it as ``%s/index``.
213
Data are indexed either as terms or as values. Words found in package
214
descriptions are indexed lowercase, and all other kinds of terms have an
215
uppercase prefix as documented below.
217
Numbers are indexed as Xapian numeric values. A list of the meaning of the
218
numeric values is found in ``%s``.
220
The data sources used for indexing are:
221
""").lstrip() % (XAPIANDBPATH, XAPIANDBVALUES)
224
print >>out, " * %s: %s" % (d['name'], d['shortDesc'])
226
print >>out, textwrap.dedent("""
227
This Xapian index follows the conventions for term prefixes described in
228
``/usr/share/doc/xapian-omega/termprefixes.txt.gz``.
230
Extra Debian data sources can define more extended prefixes (starting with
231
``X``): their meaning is documented below together with the rest of the data
232
source documentation.
234
At the very least, at least one term with the package name and the XP
235
prefix will be present in every document in the database. This allows to
236
quickly lookup a Xapian document by package name.
238
The user data associated to a Xapian document is the package name.
247
print >>out, d['name']
248
print >>out, '='*len(d['name'])
249
print >>out, textwrap.dedent(d['fullDoc'])
253
# Atomic update of the documentation
254
os.rename(pathname+".tmp", pathname)
261
from optparse import OptionParser
266
class Parser(OptionParser):
267
def __init__(self, *args, **kwargs):
268
OptionParser.__init__(self, *args, **kwargs)
270
def error(self, msg):
271
sys.stderr.write("%s: error: %s\n\n" % (self.get_prog_name(), msg))
272
self.print_help(sys.stderr)
275
parser = Parser(usage="usage: %prog [options]",
276
version="%prog "+ VERSION,
277
description="Rebuild the Apt Xapian index")
278
parser.add_option("-q", "--quiet", action="store_true", help="quiet mode: only output fatal errors")
279
parser.add_option("-v", "--verbose", action="store_true", help="verbose mode")
280
parser.add_option("-f", "--force", action="store_true", help="force database rebuild even if it's already up to date")
282
(options, args) = parser.parse_args()
285
# Here starts the main functionality. Imports things here so we can do --help
286
# without requiring lots of dependencies (this helps at least help2man at
287
# package build time)
289
# Yes, apt, thanks, I know, the api isn't stable, thank you so very much
290
#warnings.simplefilter('ignore', FutureWarning)
291
warnings.filterwarnings("ignore","apt API not stable yet")
293
warnings.resetwarnings()
294
import os.path, re, imp, glob, xapian, textwrap, shutil, fcntl, errno, itertools, time
297
#if options.quiet: print "quiet"
298
#if options.verbose: print "verbose"
299
#if options.force: print "force"
301
# Instantiate the progress report
303
progress = SilentProgress()
305
progress = Progress()
308
progress.is_verbose = True
310
# Create the database directory if missing
311
if not os.path.isdir(XAPIANDBPATH):
312
progress.verbose("Creating the database directory at %s" % XAPIANDBPATH)
313
os.mkdir(XAPIANDBPATH)
315
# Lock the session so that we prevent concurrent updates
316
lockfd = os.open(XAPIANDBLOCK, os.O_RDONLY | os.O_CREAT)
318
fcntl.lockf(lockfd, fcntl.LOCK_EX | fcntl.LOCK_NB)
320
if e.errno == errno.EACCES or e.errno == errno.EAGAIN:
321
progress.notice("Another update is in progress: aborting.")
324
# Read values database
325
#values = readValueDB(VALUESCONF, progress)
327
# Read the addons, in sorted order
328
addons = readPlugins(PLUGINDIR, progress)
330
# Ensure that we have something to do
332
progress.notice("No indexing plugins found in %s" % PLUGINDIR)
335
# Get the most recent modification timestamp of the data sources
336
ds_timestamp = max([x.info['timestamp'] for x in addons])
338
# Get the timestamp of the last database update
340
cur_timestamp = os.path.getmtime(XAPIANDBSTAMP)
343
progress.notice("Reading current timestamp failed: %s. Assuming the index has not been created yet." % e)
346
progress.verbose("Most recent dataset: %s." % time.ctime(ds_timestamp))
347
progress.verbose("Most recent update for: %s." % time.ctime(cur_timestamp))
349
# See if we need an update
350
if ds_timestamp <= cur_timestamp:
352
progress.notice("The index %s is up to date, but rebuilding anyway as requested." % XAPIANDBPATH)
354
progress.notice("The index %s is up to date" % XAPIANDBPATH)
357
# Build the value database
358
progress.verbose("Aggregating value information.")
363
for v in addon.info.get("values", []):
364
values[v['name']] = values_seq
366
values_desc[v['name']] = v['desc']
368
# Tell the addons to do the long initialisation bits
369
progress.verbose("Initializing plugins.")
371
addon.obj.init(dict(values = values), progress)
373
# Create a new Xapian index with the content provided by the addons
374
# Xapian takes care of preventing concurrent updates and removing the old
375
# database if it's left over by a previous crashed update
377
# Create a temporary file name
378
for idx in itertools.count(1):
379
tmpidxfname = "index.%d" % idx
380
dbdir = XAPIANDBPATH + "/" + tmpidxfname
381
if not os.path.exists(dbdir): break;
382
buildIndex(dbdir, addons, progress)
384
# Update the 'index' symlink to point at the new index
385
progress.verbose("Installing the new index.")
387
os.unlink(XAPIANDBPATH + "/index.tmp")
389
# Ignore the error here: we're deleting it 'just in case', because symlink
390
# wouldn't delete it itself
392
os.symlink(tmpidxfname, XAPIANDBPATH + "/index.tmp")
393
os.rename(XAPIANDBPATH + "/index.tmp", XAPIANDBPATH + "/index")
395
# Remove all other index.* directories that are not the newly created one
396
for file in os.listdir(XAPIANDBPATH):
397
if not file.startswith("index."): continue
398
# Don't delete directories
399
if not os.path.isdir(XAPIANDBPATH + "/" + file): continue
400
# Don't delete what we just created
401
if file == tmpidxfname: continue
402
fullpath = XAPIANDBPATH + "/" + file
403
progress.verbose("Removing old index %s." % fullpath)
404
shutil.rmtree(fullpath)
406
# Commit the changes and update the last update timestamp
407
if not os.path.exists(XAPIANDBSTAMP):
408
open(XAPIANDBSTAMP, "w").close()
409
os.utime(XAPIANDBSTAMP, (ds_timestamp, ds_timestamp))
411
writeValues(XAPIANDBVALUES, values, values_desc, progress)
412
writeDoc(XAPIANDBDOC, addons, progress)