~mmcg069/software-center/bug855666

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
# Copyright (C) 2009 Canonical
#
# Authors:
#  Michael Vogt
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; version 3.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

from gi.repository import GObject
import locale
import logging
import os
import re
import string
import threading
import xapian
from softwarecenter.db.application import Application

#from softwarecenter.utils import *
from softwarecenter.enums import (
    AVAILABLE_FOR_PURCHASE_MAGIC_CHANNEL_NAME,
    PkgStates,
    XapianValues
    )

from softwarecenter.paths import XAPIAN_BASE_PATH_SOFTWARE_CENTER_AGENT
from gettext import gettext as _

class SearchQuery(list):
    """ a list wrapper for a search query. it can take a search string
        or a list of search strings

        It provides __eq__ to easily compare two search query lists
    """
    def __init__(self, query_string_or_list):
        if query_string_or_list is None:
            pass
        # turn single querries into a single item list
        elif isinstance(query_string_or_list, xapian.Query):
            self.append(query_string_or_list)
        else:
            self.extend(query_string_or_list)
    def __eq__(self, other):
        # turn single querries into a single item list
        if  isinstance(other, xapian.Query):
            other = [other]
        q1 = [str(q) for q in self]
        q2 = [str(q) for q in other]
        return q1 == q2
    def __ne__(self, other):
        return not self.__eq__(other)
    def __repr__(self):
        return "[%s]" % ",".join([str(q) for q in self])

# class LocaleSorter(xapian.KeyMaker)
#   ubuntu maverick does not have the KeyMakter yet, maintain compatibility
#   for now by falling back to the old xapian.Sorter
try:
    parentClass = xapian.KeyMaker
except AttributeError:
    parentClass = xapian.Sorter
class LocaleSorter(parentClass):
    """ Sort in a locale friendly way by using locale.xtrxfrm """
    def __init__(self, db):
        super(LocaleSorter, self).__init__()
        self.db = db
    def __call__(self, doc):
        return locale.strxfrm(doc.get_value(self.db._axi_values["display_name"]))

class TopRatedSorter(xapian.KeyMaker):
    """ Sort using the top rated data """
    def __init__(self, db, review_loader):
        super(TopRatedSorter, self).__init__()
        self.db = db
        self.review_loader = review_loader
    def __call__(self, doc):
        app = Application(self.db.get_appname(doc),
                          self.db.get_pkgname(doc))
        stats = self.review_loader.get_review_stats(app)
        import xapian
        if stats:
            return xapian.sortable_serialise(stats.dampened_rating)
        return xapian.sortable_serialise(0)

def parse_axi_values_file(filename="/var/lib/apt-xapian-index/values"):
    """ parse the apt-xapian-index "values" file and provide the 
    information in the self._axi_values dict
    """
    axi_values = {}
    if not os.path.exists(filename):
        return axi_values
    for raw_line in open(filename):
        line = string.split(raw_line, "#", 1)[0]
        if line.strip() == "":
            continue
        (key, value) = line.split()
        axi_values[key] = int(value)
    return axi_values

class StoreDatabase(GObject.GObject):
    """thin abstraction for the xapian database with convenient functions"""

    # TRANSLATORS: List of "grey-listed" words sperated with ";"
    # Do not translate this list directly. Instead,
    # provide a list of words in your language that people are likely
    # to include in a search but that should normally be ignored in
    # the search.
    SEARCH_GREYLIST_STR = _("app;application;package;program;programme;"
                            "suite;tool")

    # signal emited
    __gsignals__ = {"reopen" : (GObject.SIGNAL_RUN_FIRST,
                                GObject.TYPE_NONE,
                                ()),
                    "open" : (GObject.SIGNAL_RUN_FIRST,
                              GObject.TYPE_NONE,
                              (GObject.TYPE_STRING,)),
                    }
    def __init__(self, pathname, cache):
        GObject.GObject.__init__(self)
        self._db_pathname = pathname
        self._aptcache = cache
        self._additional_databases = []
        # xapian.Database is not thread safe (its however safe to 
        # have multiple xapian.Databases. if this lock becomes a bottleneck
        # we need to replace it with a solution that creates a DB per
        # search query)
        self._search_lock = threading.Lock()

        # the xapian values as read from /var/lib/apt-xapian-index/values
        self._axi_values = {}
        self._logger = logging.getLogger("softwarecenter.db")

    def acquire_search_lock(self):
        self._search_lock.acquire()

    def release_search_lock(self):
        self._search_lock.release()

    def open(self, pathname=None, use_axi=True, use_agent=True):
        """ open the database """
        if pathname:
            self._db_pathname = pathname
        self.xapiandb = xapian.Database(self._db_pathname)
        # add the apt-xapian-database for here (we don't do this
        # for now as we do not have a good way to integrate non-apps
        # with the UI)
        self.nr_databases = 0
        self._use_axi = use_axi
        self._use_agent = use_agent
        if use_axi:
            try:
                axi = xapian.Database("/var/lib/apt-xapian-index/index")
                self.xapiandb.add_database(axi)
                self._axi_values = parse_axi_values_file()
                self.nr_databases += 1
            except:
                self._logger.exception("failed to add apt-xapian-index")
        if use_agent:
            try:
                sca = xapian.Database(XAPIAN_BASE_PATH_SOFTWARE_CENTER_AGENT)
                self.xapiandb.add_database(sca)
                self.nr_databases += 1
            except Exception as e:
                logging.warn("failed to add sca db %s" % e)
        # additional dbs
        for db in self._additional_databases:
            self.xapiandb.add_database(db)
            self.nr_databases += 1
        # parser etc
        self.xapian_parser = xapian.QueryParser()
        self.xapian_parser.set_database(self.xapiandb)
        self.xapian_parser.add_boolean_prefix("pkg", "XP")
        self.xapian_parser.add_boolean_prefix("pkg", "AP")
        self.xapian_parser.add_boolean_prefix("mime", "AM")
        self.xapian_parser.add_boolean_prefix("section", "XS")
        self.xapian_parser.add_boolean_prefix("origin", "XOC")
        self.xapian_parser.add_prefix("pkg_wildcard", "XP")
        self.xapian_parser.add_prefix("pkg_wildcard", "AP")
        self.xapian_parser.set_default_op(xapian.Query.OP_AND)
        self.emit("open", self._db_pathname)

    def add_database(self, database):
        self._additional_databases.append(database)
        self.xapiandb.add_database(database)

    def del_database(self, database):
        self._additional_databases.remove(database)

    def schema_version(self):
        """Return the version of the database layout
        
           This is useful to ensure we force a rebuild if its
           older than what we expect
        """
        return self.xapiandb.get_metadata("db-schema-version")

    def reopen(self):
        " reopen the database "
        self.open(use_axi=self._use_axi, use_agent=self._use_agent)
        self.emit("reopen")

    @property
    def popcon_max(self):
        popcon_max = xapian.sortable_unserialise(self.xapiandb.get_metadata("popcon_max_desktop"))
        assert popcon_max > 0
        return popcon_max

    def get_query_list_from_search_entry(self, search_term, category_query=None):
        """ get xapian.Query from a search term string and a limit the
            search to the given category
        """
        def _add_category_to_query(query):
            """ helper that adds the current category to the query"""
            if not category_query:
                return query
            return xapian.Query(xapian.Query.OP_AND, 
                                category_query,
                                query)
        # empty query returns a query that matches nothing (for performance
        # reasons)
        if search_term == "" and category_query is None:
            return SearchQuery(xapian.Query())
        # we cheat and return a match-all query for single letter searches
        if len(search_term) < 2:
            return SearchQuery(_add_category_to_query(xapian.Query("")))

        # check if there is a ":" in the search, if so, it means the user
        # is using a xapian prefix like "pkg:" or "mime:" and in this case
        # we do not want to alter the search term (as application is in the
        # greylist but a common mime-type prefix)
        if not ":" in search_term:
            # filter query by greylist (to avoid overly generic search terms)
            orig_search_term = search_term
            for item in self.SEARCH_GREYLIST_STR.split(";"):
                (search_term, n) = re.subn('\\b%s\\b' % item, '', search_term)
                if n: 
                    self._logger.debug("greylist changed search term: '%s'" % search_term)
        # restore query if it was just greylist words
        if search_term == '':
            self._logger.debug("grey-list replaced all terms, restoring")
            search_term = orig_search_term
        # we have to strip the leading and trailing whitespaces to avoid having
        # different results for e.g. 'font ' and 'font' (LP: #506419)
        search_term = search_term.strip()
        # get a pkg query
        pkg_query = xapian.Query()
        for term in search_term.split():
            pkg_query = xapian.Query(xapian.Query.OP_OR,
                                     xapian.Query("XP"+term),
                                     pkg_query)
        pkg_query = _add_category_to_query(pkg_query)

        # get a search query
        if not ':' in search_term: # ie, not a mimetype query
            # we need this to work around xapian oddness
            search_term = search_term.replace('-','_')
        fuzzy_query = self.xapian_parser.parse_query(search_term, 
                                               xapian.QueryParser.FLAG_PARTIAL|
                                               xapian.QueryParser.FLAG_BOOLEAN)
        # if the query size goes out of hand, omit the FLAG_PARTIAL
        # (LP: #634449)
        if fuzzy_query.get_length() > 1000:
            fuzzy_query = self.xapian_parser.parse_query(search_term, 
                                            xapian.QueryParser.FLAG_BOOLEAN)
        # now add categories
        fuzzy_query = _add_category_to_query(fuzzy_query)
        return SearchQuery([pkg_query,fuzzy_query])

    def get_matches_from_query(self, query, start=0, end=-1, category=None):
        enquire = xapian.Enquire(self.xapiandb)
        if isinstance(query, str):
            if query == "":
                query = xapian.Query("")
            else:
                query = self.xapian_parser.parse_query(query)
        if category:
            query = xapian.Query(xapian.Query.OP_AND, category.query, query)
        enquire.set_query(query)
        if end == -1: 
            end = len(self)
        return enquire.get_mset(start, end)

    def get_docs_from_query(self, query, start=0, end=-1, category=None):
        matches = self.get_matches_from_query(query, start, end, category)
        return [m.document for m in matches]

    def get_spelling_correction(self, search_term):
        # get a search query
        if not ':' in search_term: # ie, not a mimetype query
            # we need this to work around xapian oddness
            search_term = search_term.replace('-','_')
        self.xapian_parser.parse_query(
            search_term, xapian.QueryParser.FLAG_SPELLING_CORRECTION)
        return self.xapian_parser.get_corrected_query_string()

    def get_most_popular_applications_for_mimetype(self, mimetype, 
                                                  only_uninstalled=True, num=3):
        """ return a list of the most popular applications for the given
            mimetype 
        """
        # sort by popularity by default
        enquire = xapian.Enquire(self.xapiandb)
        enquire.set_sort_by_value_then_relevance(XapianValues.POPCON)
        # query mimetype
        query = xapian.Query("AM%s"%mimetype)
        enquire.set_query(query)
        # mset just needs to be "big enough""
        matches = enquire.get_mset(0, 100)
        apps = []
        for match in matches:
            doc = match.document
            app = Application(self.get_appname(doc),self.get_pkgname(doc),
                              popcon=self.get_popcon(doc))
            if only_uninstalled:
                if app.get_details(self).pkg_state == PkgStates.UNINSTALLED:
                    apps.append(app)
            else:
                apps.append(app)
            if len(apps) == num:
                break
        return apps

    def get_summary(self, doc):
        """ get human readable summary of the given document """
        summary = doc.get_value(XapianValues.SUMMARY)
        channel = doc.get_value(XapianValues.ARCHIVE_CHANNEL)
        # if we do not have the summary in the xapian db, get it
        # from the apt cache
        if not summary and self._aptcache.ready: 
            pkgname = self.get_pkgname(doc)
            if (pkgname in self._aptcache and 
                self._aptcache[pkgname].candidate):
                return  self._aptcache[pkgname].candidate.summary
            elif channel:
                # FIXME: print something if available for our arch
                pass
        return summary

    def get_pkgname(self, doc):
        """ Return a packagename from a xapian document """
        pkgname = doc.get_value(XapianValues.PKGNAME)
        # if there is no value it means we use the apt-xapian-index 
        # that stores the pkgname in the data field or as a value
        if not pkgname:
            # the doc says that get_value() is quicker than get_data()
            # so we use that if we have a updated DB, otherwise
            # fallback to the old way (the xapian DB may not yet be rebuild)
            if self._axi_values and "pkgname" in self._axi_values:
                pkgname = doc.get_value(self._axi_values["pkgname"])
            else:
                pkgname = doc.get_data()
        return pkgname

    def get_appname(self, doc):
        """ Return a appname from a xapian document, or None if
            a value for appname cannot be found in the document
         """
        return doc.get_value(XapianValues.APPNAME)

    def get_iconname(self, doc):
        """ Return the iconname from the xapian document """
        iconname = doc.get_value(XapianValues.ICON)
        return iconname

    def pkg_in_category(self, pkgname, cat_query):
        """ Return True if the given pkg is in the given category """
        pkg_query1 = xapian.Query("AP"+pkgname)
        pkg_query2 = xapian.Query("XP"+pkgname)
        pkg_query = xapian.Query(xapian.Query.OP_OR, pkg_query1, pkg_query2)
        pkg_and_cat_query = xapian.Query(xapian.Query.OP_AND, pkg_query, cat_query)
        enquire = xapian.Enquire(self.xapiandb)
        enquire.set_query(pkg_and_cat_query)
        matches = enquire.get_mset(0, len(self))
        if matches:
            return True
        return False

    def get_apps_for_pkgname(self, pkgname):
        """ Return set of docids with the matching applications for the
            given pkgname """
        result = set()
        for m in self.xapiandb.postlist("AP"+pkgname):
            result.add(m.docid)
        return result
        
    def get_icon_download_url(self, doc):
        """ Return the url of the icon or None """
        url = doc.get_value(XapianValues.ICON_URL)
        return url

    def get_popcon(self, doc):
        """ Return a popcon value from a xapian document """
        popcon_raw = doc.get_value(XapianValues.POPCON)
        if popcon_raw:
            popcon = xapian.sortable_unserialise(popcon_raw)
        else:
            popcon = 0
        return popcon

    def get_xapian_document(self, appname, pkgname):
        """ Get the machting xapian document for appname, pkgname
        
        If no document is found, raise a IndexError
        """
        #self._logger.debug("get_xapian_document app='%s' pkg='%s'" % (appname,pkgname))
        # first search for appname in the app-install-data namespace
        for m in self.xapiandb.postlist("AA"+appname):
            doc = self.xapiandb.get_document(m.docid)
            if doc.get_value(XapianValues.PKGNAME) == pkgname:
                return doc
        # then search for pkgname in the app-install-data namespace
        for m in self.xapiandb.postlist("AP"+pkgname):
            doc = self.xapiandb.get_document(m.docid)
            if doc.get_value(XapianValues.PKGNAME) == pkgname:
                return doc
        # then look for matching packages from a-x-i
        for m in self.xapiandb.postlist("XP"+pkgname):
            doc = self.xapiandb.get_document(m.docid)
            return doc
        # no matching document found
        raise IndexError("No app '%s' for '%s' in database" % (appname,pkgname))

    def is_appname_duplicated(self, appname):
        """Check if the given appname is stored multiple times in the db
           This can happen for generic names like "Terminal"
        """
        for (i, m) in enumerate(self.xapiandb.postlist("AA"+appname)):
            if i > 0:
                return True
        return False

    def get_installed_purchased_packages(self):
        """ return a set() of packagenames of purchased apps that are
            currently installed 
        """
        for_purchase_query = xapian.Query(
            "AH" + AVAILABLE_FOR_PURCHASE_MAGIC_CHANNEL_NAME)
        enquire = xapian.Enquire(self.xapiandb)
        enquire.set_query(for_purchase_query)
        matches = enquire.get_mset(0, self.xapiandb.get_doccount())
        installed_purchased_packages = set()
        for m in matches:
            pkgname = self.get_pkgname(m.document)
            if (pkgname in self._aptcache and
                self._aptcache[pkgname].is_installed):
                installed_purchased_packages.add(pkgname)
        return installed_purchased_packages

    def get_origins_from_db(self):
        """ return all origins available in the current database """
        origins = set()
        for term in self.xapiandb.allterms("XOO"):
            if term.term[3:]:
                origins.add(term.term[3:])
        return list(origins)

    def get_exact_matches(self, pkgnames=[]):
        """ Returns a list of fake MSetItems. If the pkgname is available, then
            MSetItem.document is pkgnames proper xapian document. If the pkgname
            is not available, then MSetItem is actually an Application. """
        matches = []
        for pkgname in pkgnames:
            app = Application('', pkgname.split('?')[0])
            if '?' in pkgname:
                app.request = pkgname.split('?')[1]
            match = app
            for m in  self.xapiandb.postlist("XP"+app.pkgname):
                match = self.xapiandb.get_document(m.docid)
            for m in self.xapiandb.postlist("AP"+app.pkgname):
                match = self.xapiandb.get_document(m.docid)
            matches.append(FakeMSetItem(match))
        return matches        

    def __len__(self):
        """return the doc count of the database"""
        return self.xapiandb.get_doccount()

    def __iter__(self):
        """ support iterating over the documents """
        for it in self.xapiandb.postlist(""):
            doc = self.xapiandb.get_document(it.docid)
            yield doc

class FakeMSetItem():
    def __init__(self, doc):
        self.document = doc

if __name__ == "__main__":
    import apt
    import sys

    db = StoreDatabase("/var/cache/software-center/xapian", apt.Cache())
    db.open()

    if len(sys.argv) < 2:
        search = "apt,apport"
    else:
        search = sys.argv[1]
    query = db.get_query_list_from_search_entry(search)
    print query
    enquire = xapian.Enquire(db.xapiandb)
    enquire.set_query(query)
    matches = enquire.get_mset(0, len(db))
    for m in matches:
        doc = m.document
        print doc.get_data()

    # test origin
    query = xapian.Query("XOL"+"Ubuntu")
    enquire = xapian.Enquire(db.xapiandb)
    enquire.set_query(query)
    matches = enquire.get_mset(0, len(db))
    print "Ubuntu origin: ", len(matches)