2
2
# debtags.py -- Access and manipulate Debtags information
4
# Copyright (C) 2006 Enrico Zini <enrico@enricozini.org>
6
# This library is free software; you can redistribute it and/or
7
# modify it under the terms of the GNU Lesser General Public
8
# License as published by the Free Software Foundation; either
9
# version 2.1 of the License, or (at your option) any later version.
11
# This library is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
3
# Copyright (C) 2006-2007 Enrico Zini <enrico@enricozini.org>
5
# This program is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation, either version 3 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful, but
11
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
# Lesser General Public License for more details.
16
# You should have received a copy of the GNU Lesser General Public
17
# License along with this library; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13
# General Public License for more details.
21
# TODO: install python-epydoc and try to autogenerate documntation from that
15
# You should have received a copy of the GNU General Public License
16
# along with this program. If not, see <http://www.gnu.org/licenses/>.
18
import math, re, cPickle
21
lre = re.compile(r"^(.+?)(?::?\s*|:\s+(.+?)\s*)$")
23
# Is there a way to remove the last character of a line that does not
24
# make a copy of the entire line?
26
pkgs = set(m.group(1).split(', '))
28
tags = set(m.group(2).split(', '))
25
33
def readTagDatabase(input):
26
34
"Read the tag database, returning a pkg->tags dictionary"
29
# Is there a way to remove the last character of a line that does not
30
# make a copy of the entire line?
31
line = line.rstrip("\n")
32
pkgs, tags = line.split(": ")
36
for pkgs, tags in parseTags(input):
33
37
# Create the tag set using the native set
34
tags = set(tags.split(", "))
35
for p in pkgs.split(", "):
36
39
db[p] = tags.copy()
39
42
def readTagDatabaseReversed(input):
40
43
"Read the tag database, returning a tag->pkgs dictionary"
43
# Is there a way to remove the last character of a line that does not
44
# make a copy of the entire line?
45
line = line.rstrip("\n")
46
pkgs, tags = line.split(": ")
45
for pkgs, tags in parseTags(input):
47
46
# Create the tag set using the native set
48
pkgs = set(pkgs.split(", "))
49
for tag in tags.split(", "):
50
48
if db.has_key(tag):
57
55
"Read the tag database, returning a pkg->tags and a tag->pkgs dictionary"
61
# Is there a way to remove the last character of a line that does not
62
# make a copy of the entire line?
63
line = line.rstrip("\n")
64
pkgs, tags = line.split(": ")
58
for pkgs, tags in parseTags(input):
65
59
# Create the tag set using the native set
66
pkgs = set(pkgs.split(", "))
67
60
if tagFilter == None:
68
tags = set(tags.split(", "))
70
tags = set(filter(tagFilter, tags.split(', ')))
63
tags = set(filter(tagFilter, tags))
72
65
db[pkg] = tags.copy()
150
143
self.db, self.rdb = readTagDatabaseBothWays(input, tagFilter)
145
def qwrite(self, file):
146
"Quickly write the data to a pickled file"
147
cPickle.dump(self.db, file)
148
cPickle.dump(self.rdb, file)
150
def qread(self, file):
151
"Quickly read the data from a pickled file"
152
self.db = cPickle.load(file)
153
self.rdb = cPickle.load(file)
152
155
def insert(self, pkg, tags):
153
156
self.db[pkg] = tags.copy()
170
173
res.rdb = self.db
176
def facetCollection(self):
178
Return a copy of this collection, but replaces the tag names
179
with only their facets.
182
tofacet = re.compile(r"^([^:]+).+")
183
for pkg, tags in self.iterPackagesTags():
184
ftags = set([tofacet.sub(r"\1", t) for t in tags])
185
fcoll.insert(pkg, ftags)
190
Return a copy of this collection, with the tagsets copied as
194
res.db = self.db.copy()
195
res.rdb = self.rdb.copy()
173
198
def reverseCopy(self):
175
200
Return the reverse collection, with a copy of the tagsets of
179
204
res.db = self.rdb.copy()
232
259
res.rdb = reverse(db)
262
def filterPackagesTags(self, packageTagFilter):
264
Return a collection with only those packages that match a
265
filter, sharing tagsets with this one. The filter will match
270
for pkg, tags in filter(packageTagFilter, self.db.iteritems()):
271
db[pkg] = self.db[pkg]
273
res.rdb = reverse(db)
276
def filterPackagesTagsCopy(self, packageTagFilter):
278
Return a collection with only those packages that match a
279
filter, with a copy of the tagsets of this one. The filter
280
will match on (package, tags).
284
for pkg, tags in filter(packageTagFilter, self.db.iteritems()):
285
db[pkg] = self.db[pkg].copy()
287
res.rdb = reverse(db)
290
def filterTags(self, tagFilter):
292
Return a collection with only those tags that match a
293
filter, sharing package sets with this one. The filter will match
298
for tag in filter(tagFilter, self.rdb.iterkeys()):
299
rdb[tag] = self.rdb[tag]
301
res.db = reverse(rdb)
304
def filterTagsCopy(self, tagFilter):
306
Return a collection with only those tags that match a
307
filter, with a copy of the package sets of this one. The
308
filter will match on the tag.
312
for tag in filter(tagFilter, self.rdb.iterkeys()):
313
rdb[tag] = self.rdb[tag].copy()
315
res.db = reverse(rdb)
235
318
def hasPackage(self, pkg):
236
319
"""Check if the collection contains the given package"""
237
320
return self.db.has_key(pkg)
346
429
return set(tags[:1])
433
def correlations(self):
435
Generate the list of correlation as a tuple (hastag, hasalsotag, score).
437
Every touple will indicate that the tag 'hastag' tends to also
438
have 'hasalsotag' with a score of 'score'.
440
for pivot in self.iterTags():
441
with_ = self.filterPackagesTags(lambda pt: pivot in pt[1])
442
without = self.filterPackagesTags(lambda pt: pivot not in pt[1])
443
for tag in with_.iterTags():
444
if tag == pivot: continue
445
has = float(with_.card(tag)) / float(with_.packageCount())
446
hasnt = float(without.card(tag)) / float(without.packageCount())
447
yield pivot, tag, has - hasnt