67
52
('no"', '["no""]'),
68
53
("'no", "[\"'no\"]"),
69
54
("no'", "[\"no'\"]"),
70
('"no\'', '[""no\'"]')]:
55
('"no\'', '[""no\'"]')
71
57
result = parser.parse_query(query)
72
58
assert str(result) == wanted
74
60
def testQueryParserExceptions(self):
75
61
""" search: test the query parser """
76
62
parser = QueryParser()
79
py.test.raises(QueryError, parser.parse_query, q)
64
py.test.raises(ValueError, parser.parse_query, q)
81
65
for query in ['""', '(', ')', '(a or b']:
85
class BaseSearchTest(object):
86
69
""" search: test search """
87
70
doesnotexist = u'jfhsdaASDLASKDJ'
89
# key - page name, value - page content. If value is None page
90
# will not be created but will be used for a search. None should
91
# be used for pages which already exist.
92
pages = {u'SearchTestPage': u'this is a test page',
93
u'SearchTestLinks': u'SearchTestPage',
94
u'SearchTestLinksLowerCase': u'searchtestpage',
95
u'SearchTestOtherLinks': u'SearchTestLinks',
96
u'TestEdit': u'TestEdit',
97
u'TestOnEditing': u'another test page',
98
u'ContentSearchUpper': u'Find the NEEDLE in the haystack.',
99
u'ContentSearchLower': u'Find the needle in the haystack.',
100
u'LanguageSetup': None,
101
u'CategoryHomepage': None,
102
u'HomePageWiki': None,
104
u'RecentChanges': None,
105
u'HelpOnCreoleSyntax': None,
109
searcher_class = None
111
def _index_update(self):
115
def setup_class(cls):
116
request = cls.request
117
become_trusted(request)
119
for page, text in cls.pages.iteritems():
121
create_page(request, page, text)
123
def teardown_class(self):
124
for page, text in self.pages.iteritems():
126
nuke_page(self.request, page)
128
def get_searcher(self, query):
129
raise NotImplementedError
131
def search(self, query):
132
if isinstance(query, str) or isinstance(query, unicode):
133
query = QueryParser().parse_query(query)
135
return self.get_searcher(query).run()
137
def test_title_search_simple(self):
138
searches = {u'title:SearchTestPage': 1,
139
u'title:LanguageSetup': 1,
140
u'title:HelpIndex': 1,
143
u'title:SearchTestNotExisting': 0,
144
u'title:FrontPage': 1,
145
u'title:TestOnEditing': 1,
148
def test(query, res_count):
149
result = self.search(query)
150
test_result = len(result.hits)
151
assert test_result == res_count
153
for query, res_count in searches.iteritems():
154
yield query, test, query, res_count
156
def test_title_search_re(self):
157
expected_pages = set([u'SearchTestPage', u'SearchTestLinks', u'SearchTestLinksLowerCase', u'SearchTestOtherLinks', ])
158
result = self.search(ur'-domain:underlay -domain:system title:re:\bSearchTest')
159
found_pages = set([hit.page_name for hit in result.hits])
160
assert found_pages == expected_pages
162
result = self.search(ur'-domain:underlay -domain:system title:re:\bSearchTest\b')
163
found_pages = set([hit.page_name for hit in result.hits])
164
assert not found_pages
166
def test_title_search_case(self):
167
expected_pages = set([u'SearchTestPage', ])
168
result = self.search(u'-domain:underlay -domain:system title:case:SearchTestPage')
169
found_pages = set([hit.page_name for hit in result.hits])
170
assert found_pages == expected_pages
172
result = self.search(u'-domain:underlay -domain:system title:case:searchtestpage')
173
found_pages = set([hit.page_name for hit in result.hits])
174
assert not found_pages
176
def test_title_search_case_re(self):
177
expected_pages = set([u'SearchTestPage', ])
178
result = self.search(ur'-domain:underlay -domain:system title:case:re:\bSearchTestPage\b')
179
found_pages = set([hit.page_name for hit in result.hits])
180
assert found_pages == expected_pages
182
result = self.search(ur'-domain:underlay -domain:system title:case:re:\bsearchtestpage\b')
183
found_pages = set([hit.page_name for hit in result.hits])
184
assert not found_pages
186
def test_linkto_search_simple(self):
187
expected_pages = set([u'SearchTestLinks', ])
188
result = self.search(u'-domain:underlay -domain:system linkto:SearchTestPage')
189
found_pages = set([hit.page_name for hit in result.hits])
190
assert found_pages == expected_pages
192
result = self.search(u'-domain:underlay -domain:system linkto:SearchTestNotExisting')
193
found_pages = set([hit.page_name for hit in result.hits])
194
assert not found_pages
196
def test_linkto_search_re(self):
197
expected_pages = set([u'SearchTestLinks', u'SearchTestOtherLinks', ])
198
result = self.search(ur'-domain:underlay -domain:system linkto:re:\bSearchTest')
199
found_pages = set([hit.page_name for hit in result.hits])
200
assert found_pages == expected_pages
202
result = self.search(ur'-domain:underlay -domain:system linkto:re:\bSearchTest\b')
203
found_pages = set([hit.page_name for hit in result.hits])
204
assert not found_pages
206
def test_linkto_search_case(self):
207
expected_pages = set([u'SearchTestLinks', ])
208
result = self.search(u'-domain:underlay -domain:system linkto:case:SearchTestPage')
209
found_pages = set([hit.page_name for hit in result.hits])
210
assert found_pages == expected_pages
212
result = self.search(u'-domain:underlay -domain:system linkto:case:searchtestpage')
213
found_pages = set([hit.page_name for hit in result.hits])
214
assert not found_pages
216
def test_linkto_search_case_re(self):
217
expected_pages = set([u'SearchTestLinks', ])
218
result = self.search(ur'-domain:underlay -domain:system linkto:case:re:\bSearchTestPage\b')
219
found_pages = set([hit.page_name for hit in result.hits])
220
assert found_pages == expected_pages
222
result = self.search(ur'-domain:underlay -domain:system linkto:case:re:\bsearchtestpage\b')
223
found_pages = set([hit.page_name for hit in result.hits])
224
assert not found_pages
226
def test_category_search_simple(self):
227
expected_pages = set([u'HomePageWiki', ])
228
result = self.search(u'category:CategoryHomepage')
229
found_pages = set([hit.page_name for hit in result.hits])
230
assert found_pages == expected_pages
232
result = self.search(u'category:CategorySearchTestNotExisting')
233
found_pages = set([hit.page_name for hit in result.hits])
234
assert not found_pages
236
def test_category_search_re(self):
237
expected_pages = set([u'HomePageWiki', ])
238
result = self.search(ur'category:re:\bCategoryHomepage\b')
239
found_pages = set([hit.page_name for hit in result.hits])
240
assert found_pages == expected_pages
242
result = self.search(ur'category:re:\bCategoryHomepa\b')
243
found_pages = set([hit.page_name for hit in result.hits])
244
assert not found_pages
246
def test_category_search_case(self):
247
expected_pages = set([u'HomePageWiki', ])
248
result = self.search(u'category:case:CategoryHomepage')
249
found_pages = set([hit.page_name for hit in result.hits])
250
assert found_pages == expected_pages
252
result = self.search(u'category:case:categoryhomepage')
253
found_pages = set([hit.page_name for hit in result.hits])
254
assert not found_pages
256
def test_category_search_case_re(self):
257
expected_pages = set([u'HomePageWiki', ])
258
result = self.search(ur'category:case:re:\bCategoryHomepage\b')
259
found_pages = set([hit.page_name for hit in result.hits])
260
assert found_pages == expected_pages
262
result = self.search(ur'category:case:re:\bcategoryhomepage\b')
263
found_pages = set([hit.page_name for hit in result.hits])
264
assert not found_pages
266
def test_mimetype_search_simple(self):
267
result = self.search(u'mimetype:text/wiki')
268
test_result = len(result.hits)
269
assert test_result == 14
271
def test_mimetype_search_re(self):
272
result = self.search(ur'mimetype:re:\btext/wiki\b')
273
test_result = len(result.hits)
274
assert test_result == 14
276
result = self.search(ur'category:re:\bCategoryHomepa\b')
277
found_pages = set([hit.page_name for hit in result.hits])
278
assert not found_pages
280
def test_language_search_simple(self):
281
result = self.search(u'language:en')
282
test_result = len(result.hits)
283
assert test_result == 14
285
def test_domain_search_simple(self):
286
result = self.search(u'domain:system')
289
def test_search_and(self):
72
def testTitleSearchFrontPage(self):
73
""" search: title search for FrontPage """
74
result = search.searchPages(self.request, u"title:FrontPage")
75
assert len(result.hits) == 1
77
def testTitleSearchAND(self):
290
78
""" search: title search with AND expression """
291
expected_pages = set([u'HelpOnCreoleSyntax', ])
292
result = self.search(u"title:HelpOnCreoleSyntax lang:en")
293
found_pages = set([hit.page_name for hit in result.hits])
294
assert found_pages == expected_pages
296
result = self.search(u"title:HelpOnCreoleSyntax lang:de")
297
found_pages = set([hit.page_name for hit in result.hits])
298
assert not found_pages
300
result = self.search(u"title:Help title:%s" % self.doesnotexist)
301
found_pages = set([hit.page_name for hit in result.hits])
302
assert not found_pages
79
result = search.searchPages(self.request, u"title:Help title:Index")
80
assert len(result.hits) == 1
304
82
def testTitleSearchOR(self):
305
83
""" search: title search with OR expression """
306
expected_pages = set([u'FrontPage', u'RecentChanges', ])
307
result = self.search(u"title:FrontPage or title:RecentChanges")
308
found_pages = set([hit.page_name for hit in result.hits])
309
assert found_pages == expected_pages
84
result = search.searchPages(self.request, u"title:FrontPage or title:RecentChanges")
85
assert len(result.hits) == 2
311
87
def testTitleSearchNegatedFindAll(self):
312
88
""" search: negated title search for some pagename that does not exist results in all pagenames """
313
result = self.search(u"-title:%s" % self.doesnotexist)
314
n_pages = len(self.pages)
315
test_result = len(result.hits)
316
assert test_result == n_pages
89
result = search.searchPages(self.request, u"-title:%s" % self.doesnotexist)
90
assert len(result.hits) > 100 # XXX should be "all"
318
92
def testTitleSearchNegativeTerm(self):
319
93
""" search: title search for a AND expression with a negative term """
320
result = self.search(u"-title:FrontPage")
321
found_pages = set([hit.page_name for hit in result.hits])
322
assert u'FrontPage' not in found_pages
323
test_result = len(result.hits)
324
n_pages = len(self.pages) - 1
325
assert test_result == n_pages
327
result = self.search(u"-title:HelpOn")
328
test_result = len(result.hits)
329
n_pages = len(self.pages) - 1
330
assert test_result == n_pages
94
helpon_count = len(search.searchPages(self.request, u"title:HelpOn").hits)
95
result = search.searchPages(self.request, u"title:HelpOn -title:Acl")
96
assert len(result.hits) == helpon_count - 1 # finds all HelpOn* except one
332
98
def testFullSearchNegatedFindAll(self):
333
99
""" search: negated full search for some string that does not exist results in all pages """
334
result = self.search(u"-%s" % self.doesnotexist)
335
test_result = len(result.hits)
336
n_pages = len(self.pages)
337
assert test_result == n_pages
339
def testFullSearchRegexCaseInsensitive(self):
340
""" search: full search for regular expression (case insensitive) """
341
search_re = 'ne{2}dle' # matches 'NEEDLE' or 'needle' or ...
342
expected_pages = set(['ContentSearchUpper', 'ContentSearchLower', ])
343
result = self.search(u'-domain:underlay -domain:system re:%s' % search_re)
344
found_pages = set([hit.page_name for hit in result.hits])
345
assert found_pages == expected_pages
347
def testFullSearchRegexCaseSensitive(self):
348
""" search: full search for regular expression (case sensitive) """
349
search_re = 'ne{2}dle' # matches 'needle'
350
expected_pages = set(['ContentSearchLower', ])
351
result = self.search(u'-domain:underlay -domain:system re:case:%s' % search_re)
352
found_pages = set([hit.page_name for hit in result.hits])
353
assert found_pages == expected_pages
355
def test_title_search(self):
356
expected_pages = set(['FrontPage', ])
357
query = QueryParser(titlesearch=True).parse_query('FrontPage')
358
result = self.search(query)
359
found_pages = set([hit.page_name for hit in result.hits])
360
assert found_pages == expected_pages
362
def test_create_page(self):
363
expected_pages = set([u'TestCreatePage', ])
364
self.pages['TestCreatePage'] = 'some text' # Moin search must search this page
366
create_page(self.request, 'TestCreatePage', self.pages['TestCreatePage'])
368
result = self.search(u'-domain:underlay -domain:system TestCreatePage')
369
found_pages = set([hit.page_name for hit in result.hits])
370
assert found_pages == expected_pages
372
nuke_page(self.request, 'TestCreatePage')
374
del self.pages['TestCreatePage']
375
result = self.search(u'-domain:underlay -domain:system TestCreatePage')
376
found_pages = set([hit.page_name for hit in result.hits])
377
assert not found_pages
379
def test_attachment(self):
380
page_name = u'TestAttachment'
381
self.pages[page_name] = 'some text' # Moin search must search this page
383
filename = "AutoCreatedSillyAttachmentForSearching.png"
384
data = "Test content"
385
filecontent = StringIO.StringIO(data)
387
result = self.search(filename)
388
found_attachments = set([(hit.page_name, hit.attachment) for hit in result.hits])
389
assert not found_attachments
392
create_page(self.request, page_name, self.pages[page_name])
393
AttachFile.add_attachment(self.request, page_name, filename, filecontent, True)
394
append_page(self.request, page_name, '[[attachment:%s]]' % filename)
396
result = self.search(filename)
397
found_attachments = set([(hit.page_name, hit.attachment) for hit in result.hits])
398
assert (page_name, '') in found_attachments
399
assert 1 <= len(found_attachments) <= 2
400
# Note: moin search returns (page_name, '') as only result
401
# xapian search returns 2 results: (page_name, '') and (page_name, filename)
402
# TODO: make behaviour the same, if possible
404
nuke_page(self.request, page_name)
405
del self.pages[page_name]
407
result = self.search(filename)
408
found_attachments = set([(hit.page_name, hit.attachment) for hit in result.hits])
409
assert not found_attachments
411
def test_get_searcher(self):
412
assert isinstance(_get_searcher(self.request, ''), self.searcher_class)
415
class TestMoinSearch(BaseSearchTest):
416
""" search: test Moin search """
417
searcher_class = MoinSearch
419
def get_searcher(self, query):
420
pages = [{'pagename': page, 'attachment': '', 'wikiname': 'Self', } for page in self.pages]
421
return MoinSearch(self.request, query, pages=pages)
423
def test_stemming(self):
424
expected_pages = set([u'TestEdit', u'TestOnEditing', ])
425
result = self.search(u"title:edit")
426
found_pages = set([hit.page_name for hit in result.hits])
427
assert found_pages == expected_pages
429
expected_pages = set([u'TestOnEditing', ])
430
result = self.search(u"title:editing")
431
found_pages = set([hit.page_name for hit in result.hits])
432
assert found_pages == expected_pages
435
class TestXapianSearch(BaseSearchTest):
436
""" search: test Xapian indexing / search """
438
class Config(wikiconfig.Config):
441
def _index_update(self):
442
# for xapian, we queue index updates so they can get indexed later.
443
# here we make sure the queue will be processed completely,
444
# before we continue:
445
from MoinMoin.search.Xapian import XapianIndex
446
XapianIndex(self.request).do_queued_updates()
448
def get_searcher(self, query):
449
from MoinMoin.search.Xapian.search import XapianSearch
450
return XapianSearch(self.request, query)
452
def get_moin_search_connection(self):
453
from MoinMoin.search.Xapian import XapianIndex
454
return XapianIndex(self.request).get_search_connection()
456
def setup_class(self):
458
from MoinMoin.search.Xapian import XapianIndex
459
from MoinMoin.search.Xapian.search import XapianSearch
460
self.searcher_class = XapianSearch
462
except ImportError, error:
463
if not str(error).startswith('Xapian '):
465
py.test.skip('xapian is not installed')
467
nuke_xapian_index(self.request)
468
index = XapianIndex(self.request)
469
# Additionally, pages which were not created but supposed to be searched
471
pages_to_index = [page for page in self.pages if not self.pages[page]]
472
index.indexPages(mode='add', pages=pages_to_index)
474
super(TestXapianSearch, self).setup_class()
476
def teardown_class(self):
477
nuke_xapian_index(self.request)
479
def test_get_all_documents(self):
480
connection = self.get_moin_search_connection()
481
documents = connection.get_all_documents()
482
n_pages = len(self.pages)
483
test_result = len(documents)
484
assert test_result == n_pages
485
for document in documents:
486
assert document.data['pagename'][0] in self.pages.keys()
488
def test_xapian_term(self):
489
parser = QueryParser()
490
connection = self.get_moin_search_connection()
492
prefixes = {u'': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'),
493
u'title:': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'),
494
u'linkto:': ([u'', u're:', u'case:', u'case:re:'], u'FrontPage'),
495
u'category:': ([u'', u're:', u'case:', u'case:re:'], u'CategoryHomepage'),
496
u'mimetype:': ([u'', u're:'], u'text/wiki'),
497
u'language:': ([u''], u'en'),
498
u'domain:': ([u''], u'system'),
501
def test_query(query):
502
query_ = parser.parse_query(query).xapian_term(self.request, connection)
504
assert not query_.empty()
506
for prefix, data in prefixes.iteritems():
507
modifiers, term = data
508
for modifier in modifiers:
509
query = ''.join([prefix, modifier, term])
510
yield query, test_query, query
512
def test_stemming(self):
513
expected_pages = set([u'TestEdit', ])
514
result = self.search(u"title:edit")
515
found_pages = set([hit.page_name for hit in result.hits])
516
assert found_pages == expected_pages
518
expected_pages = set([u'TestOnEditing', ])
519
result = self.search(u"title:editing")
520
found_pages = set([hit.page_name for hit in result.hits])
521
assert found_pages == expected_pages
524
class TestXapianSearchStemmed(TestXapianSearch):
525
""" search: test Xapian indexing / search - with stemming enabled """
527
class Config(wikiconfig.Config):
529
xapian_stemming = True
531
def test_stemming(self):
532
py.test.skip("TODO fix TestXapianSearchStemmed - strange effects with stemming")
534
expected_pages = set([u'TestEdit', u'TestOnEditing', ])
535
result = self.search(u"title:edit")
536
found_pages = set([hit.page_name for hit in result.hits])
537
assert found_pages == expected_pages
539
expected_pages = set([u'TestEdit', u'TestOnEditing', ])
540
result = self.search(u"title:editing")
541
found_pages = set([hit.page_name for hit in result.hits])
542
assert found_pages == expected_pages
545
class TestGetSearcher(object):
547
class Config(wikiconfig.Config):
550
def test_get_searcher(self):
551
assert isinstance(_get_searcher(self.request, ''), MoinSearch), 'Xapian index is not created, despite the configuration, MoinSearch must be used!'
100
result = search.searchPages(self.request, u"-%s" % self.doesnotexist)
101
assert len(result.hits) > 100 # XXX should be "all"
103
def testFullSearchNegativeTerm(self):
104
""" search: full search for a AND expression with a negative term """
105
helpon_count = len(search.searchPages(self.request, u"HelpOn").hits)
106
result = search.searchPages(self.request, u"HelpOn -ACL")
107
assert 0 < len(result.hits) < helpon_count
553
110
coverage_modules = ['MoinMoin.search']