~ubuntu-branches/ubuntu/jaunty/beagle/jaunty-security

« back to all changes in this revision

Viewing changes to beagled/LuceneQueryingDriver.cs

  • Committer: Bazaar Package Importer
  • Author(s): Stefan Ebner
  • Date: 2008-05-04 00:31:32 UTC
  • mfrom: (1.1.21 upstream)
  • Revision ID: james.westby@ubuntu.com-20080504003132-2tkm5o8moo5952ri
Tags: 0.3.7-2ubuntu1
 * Merge from Debian unstable. (LP: #225746) Remaining Ubuntu changes:
  - debian/control:
    + Rename ice{weasel,dove}-beagle to {mozilla,thunderbird}-beagle and
      and update the dependencies accordingly.
    + Change Maintainer to Ubuntu Mono Team.
  - debian/rules:
    + Install the mozilla-beagle and thunderbird-beagle extensions.
  - ice{dove,weasel}.dirs:
    + Renamed to {mozilla,thunderbird}-beagle.dirs.
    + Fixed paths to point to usr/lib/{firefox,thunderbird}

Show diffs side-by-side

added added

removed removed

Lines of Context:
136
136
 
137
137
                ////////////////////////////////////////////////////////////////
138
138
 
139
 
                // Returns the lowest matching score before the results are
140
 
                // truncated.
141
 
                public void DoQuery (Query               query,
142
 
                                     IQueryResult        result,
143
 
                                     ICollection         search_subset_uris, // should be internal uris
144
 
                                     QueryPartHook       query_part_hook,
145
 
                                     UriFilter           uri_filter,
146
 
                                     HitFilter           hit_filter)
 
139
                // Returns the lists of terms in the query
 
140
                private ArrayList AssembleQuery (Query                  query,
 
141
                                                 QueryPartHook          query_part_hook,
 
142
                                                 HitFilter              hit_filter,
 
143
                                                 out ArrayList          primary_required_part_queries,
 
144
                                                 out ArrayList          secondary_required_part_queries,
 
145
                                                 out LNS.BooleanQuery   primary_prohibited_part_query,
 
146
                                                 out LNS.BooleanQuery   secondary_prohibited_part_query,
 
147
                                                 out AndHitFilter       all_hit_filters)
147
148
                {
148
 
                        if (Debug)
149
 
                                Logger.Log.Debug ("###### {0}: Starting low-level queries", IndexName);
150
 
 
151
 
                        Stopwatch total, a, b, c, d, e, f;
152
 
 
153
 
                        total = new Stopwatch ();
154
 
                        a = new Stopwatch ();
155
 
                        b = new Stopwatch ();
156
 
                        c = new Stopwatch ();
157
 
                        d = new Stopwatch ();
158
 
                        e = new Stopwatch ();
159
 
                        f = new Stopwatch ();
160
 
 
161
 
                        total.Start ();
162
 
                        a.Start ();
163
 
 
164
 
                        // Assemble all of the parts into a bunch of Lucene queries
165
 
 
166
 
                        ArrayList primary_required_part_queries = null;
167
 
                        ArrayList secondary_required_part_queries = null;
168
 
 
169
 
                        LNS.BooleanQuery primary_prohibited_part_query = null;
170
 
                        LNS.BooleanQuery secondary_prohibited_part_query = null;
171
 
 
172
 
                        AndHitFilter all_hit_filters;
 
149
                        primary_required_part_queries = null;
 
150
                        secondary_required_part_queries = null;
 
151
                        primary_prohibited_part_query = null;
 
152
                        secondary_prohibited_part_query = null;
 
153
 
173
154
                        all_hit_filters = new AndHitFilter ();
174
155
                        if (hit_filter != null)
175
156
                                all_hit_filters.Add (hit_filter);
209
190
                                case QueryPartLogic.Prohibited:
210
191
                                        if (primary_prohibited_part_query == null)
211
192
                                                primary_prohibited_part_query = new LNS.BooleanQuery ();
212
 
                                        primary_prohibited_part_query.Add (primary_part_query, false, false);
 
193
                                        primary_prohibited_part_query.Add (primary_part_query, LNS.BooleanClause.Occur.SHOULD);
213
194
 
214
195
                                        if (secondary_part_query != null) {
215
196
                                                if (secondary_prohibited_part_query == null)
216
197
                                                        secondary_prohibited_part_query = new LNS.BooleanQuery ();
217
 
                                                secondary_prohibited_part_query.Add (secondary_part_query, false, false);
 
198
                                                secondary_prohibited_part_query.Add (secondary_part_query, LNS.BooleanClause.Occur.SHOULD);
218
199
                                        }
219
200
 
220
201
                                        if (part_hit_filter != null) {
227
208
                                }
228
209
                        }
229
210
 
230
 
                        a.Stop ();
231
 
                        if (Debug)
232
 
                                Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);
233
 
 
234
 
                        // If we have no required parts, give up.
235
 
                        if (primary_required_part_queries == null)
236
 
                                return;
237
 
 
238
 
                        b.Start ();
239
 
                        
240
 
                        //
241
 
                        // Now that we have all of these nice queries, let's execute them!
242
 
                        //
243
 
 
244
 
                        // Create the searchers that we will need.
245
 
 
246
 
                        IndexReader primary_reader;
247
 
                        LNS.IndexSearcher primary_searcher;
248
 
                        IndexReader secondary_reader = null;
249
 
                        LNS.IndexSearcher secondary_searcher = null;
 
211
                        return term_list;
 
212
                }
 
213
 
 
214
                private void BuildSearchers (out IndexReader primary_reader,
 
215
                                            out LNS.IndexSearcher primary_searcher,
 
216
                                            out IndexReader secondary_reader,
 
217
                                            out LNS.IndexSearcher secondary_searcher)
 
218
                {
 
219
                        secondary_reader = null;
 
220
                        secondary_searcher = null;
250
221
 
251
222
                        primary_reader = LuceneCommon.GetReader (PrimaryStore);
252
223
                        primary_searcher = new LNS.IndexSearcher (primary_reader);
261
232
 
262
233
                        if (secondary_reader != null)
263
234
                                secondary_searcher = new LNS.IndexSearcher (secondary_reader);
264
 
 
265
 
                        b.Stop ();
266
 
                        if (Debug)
267
 
                                Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b);
268
 
 
269
 
                        // Build whitelists and blacklists for search subsets.
270
 
                        c.Start ();
271
 
                        
272
 
                        // Possibly create our whitelists from the search subset.
273
 
 
274
 
                        LuceneBitArray primary_whitelist = null;
275
 
                        LuceneBitArray secondary_whitelist = null;
 
235
                }
 
236
 
 
237
                private void CloseSearchers (IndexReader primary_reader,
 
238
                                            LNS.IndexSearcher primary_searcher,
 
239
                                            IndexReader secondary_reader,
 
240
                                            LNS.IndexSearcher secondary_searcher)
 
241
                {
 
242
                        primary_searcher.Close ();
 
243
                        if (secondary_searcher != null)
 
244
                                secondary_searcher.Close ();
 
245
                        ReleaseReader (primary_reader);
 
246
                        if (secondary_reader != null)
 
247
                                ReleaseReader (secondary_reader);
 
248
                }
 
249
 
 
250
                private void CreateQueryWhitelists (ICollection         search_subset_uris,
 
251
                                                    LNS.IndexSearcher   primary_searcher,
 
252
                                                    LNS.IndexSearcher   secondary_searcher,
 
253
                                                    LNS.BooleanQuery    primary_prohibited_part_query,
 
254
                                                    LNS.BooleanQuery    secondary_prohibited_part_query,
 
255
                                                    out LuceneBitArray  primary_whitelist,
 
256
                                                    out LuceneBitArray  secondary_whitelist)
 
257
                {
 
258
                        primary_whitelist = null;
 
259
                        secondary_whitelist = null;
276
260
                        
277
261
                        if (search_subset_uris != null && search_subset_uris.Count > 0) {
278
262
                                primary_whitelist = new LuceneBitArray (primary_searcher);
327
311
                                        secondary_whitelist.AndNot (secondary_blacklist);
328
312
                                }
329
313
                        }
 
314
                }
 
315
 
 
316
                ////////////////////////////////////////////////////////////////
 
317
 
 
318
                public int DoCountMatchQuery (Query query, QueryPartHook query_part_hook)
 
319
                {
 
320
                        if (Debug)
 
321
                                Logger.Log.Debug ("###### {0}: Starting low-level queries", IndexName);
 
322
 
 
323
                        Stopwatch total;
 
324
                        total = new Stopwatch ();
 
325
                        total.Start ();
 
326
 
 
327
                        ArrayList primary_required_part_queries;
 
328
                        ArrayList secondary_required_part_queries;
 
329
 
 
330
                        LNS.BooleanQuery primary_prohibited_part_query;
 
331
                        LNS.BooleanQuery secondary_prohibited_part_query;
 
332
 
 
333
                        AndHitFilter all_hit_filters;
 
334
 
 
335
                        ArrayList term_list;
 
336
                        term_list = AssembleQuery ( query,
 
337
                                                    query_part_hook,
 
338
                                                    null,
 
339
                                                    out primary_required_part_queries,
 
340
                                                    out secondary_required_part_queries,
 
341
                                                    out primary_prohibited_part_query,
 
342
                                                    out secondary_prohibited_part_query,
 
343
                                                    out all_hit_filters);
 
344
 
 
345
                        // If we have no required parts, give up.
 
346
                        if (primary_required_part_queries == null)
 
347
                                return 0;
 
348
 
 
349
                        IndexReader primary_reader;
 
350
                        LNS.IndexSearcher primary_searcher;
 
351
                        IndexReader secondary_reader;
 
352
                        LNS.IndexSearcher secondary_searcher;
 
353
 
 
354
                        BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher);
 
355
 
 
356
                        // Build whitelists and blacklists for search subsets.
 
357
                        LuceneBitArray primary_whitelist, secondary_whitelist;
 
358
                        CreateQueryWhitelists (null,
 
359
                                primary_searcher,
 
360
                                secondary_searcher,
 
361
                                primary_prohibited_part_query,
 
362
                                secondary_prohibited_part_query,
 
363
                                out primary_whitelist,
 
364
                                out secondary_whitelist);
 
365
 
 
366
                        // Now run the low level queries against our indexes.
 
367
                        BetterBitArray primary_matches = null;
 
368
                        if (primary_required_part_queries != null) {
 
369
 
 
370
                                if (secondary_searcher != null)
 
371
                                        primary_matches = DoRequiredQueries_TwoIndex (primary_searcher,
 
372
                                                                                      secondary_searcher,
 
373
                                                                                      primary_required_part_queries,
 
374
                                                                                      secondary_required_part_queries,
 
375
                                                                                      primary_whitelist,
 
376
                                                                                      secondary_whitelist);
 
377
                                else
 
378
                                        primary_matches = DoRequiredQueries (primary_searcher,
 
379
                                                                             primary_required_part_queries,
 
380
                                                                             primary_whitelist);
 
381
 
 
382
                        } 
 
383
 
 
384
                        int result = 0;
 
385
                        // FIXME: Pass the count through uri-filter and other validation checks
 
386
                        if (primary_matches != null)
 
387
                                result = primary_matches.TrueCount;
 
388
 
 
389
                        CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher);
 
390
 
 
391
                        total.Stop ();
 
392
                        if (Debug)
 
393
                                Logger.Log.Debug ("###### {0}: Total query run in {1}", IndexName, total);
 
394
 
 
395
                        return result;
 
396
                }
 
397
 
 
398
                ////////////////////////////////////////////////////////////////
 
399
 
 
400
                public void DoQuery (Query               query,
 
401
                                     IQueryResult        result,
 
402
                                     ICollection         search_subset_uris, // should be internal uris
 
403
                                     QueryPartHook       query_part_hook,
 
404
                                     UriFilter           uri_filter,
 
405
                                     HitFilter           hit_filter)
 
406
                {
 
407
                        if (Debug)
 
408
                                Logger.Log.Debug ("###### {0}: Starting low-level queries", IndexName);
 
409
 
 
410
                        Stopwatch total, a, b, c, d, e, f;
 
411
 
 
412
                        total = new Stopwatch ();
 
413
                        a = new Stopwatch ();
 
414
                        b = new Stopwatch ();
 
415
                        c = new Stopwatch ();
 
416
                        d = new Stopwatch ();
 
417
                        e = new Stopwatch ();
 
418
                        f = new Stopwatch ();
 
419
 
 
420
                        total.Start ();
 
421
                        a.Start ();
 
422
 
 
423
                        ArrayList primary_required_part_queries;
 
424
                        ArrayList secondary_required_part_queries;
 
425
 
 
426
                        LNS.BooleanQuery primary_prohibited_part_query;
 
427
                        LNS.BooleanQuery secondary_prohibited_part_query;
 
428
 
 
429
                        AndHitFilter all_hit_filters;
 
430
 
 
431
                        ArrayList term_list;
 
432
 
 
433
                        // Assemble all of the parts into a bunch of Lucene queries
 
434
 
 
435
                        term_list = AssembleQuery (query,
 
436
                                query_part_hook,
 
437
                                hit_filter,
 
438
                                out primary_required_part_queries,
 
439
                                out secondary_required_part_queries,
 
440
                                out primary_prohibited_part_query,
 
441
                                out secondary_prohibited_part_query,
 
442
                                out all_hit_filters);
 
443
 
 
444
                        a.Stop ();
 
445
                        if (Debug)
 
446
                                Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);
 
447
 
 
448
                        // If we have no required parts, give up.
 
449
                        if (primary_required_part_queries == null)
 
450
                                return;
 
451
 
 
452
                        b.Start ();
 
453
                        
 
454
                        //
 
455
                        // Now that we have all of these nice queries, let's execute them!
 
456
                        //
 
457
 
 
458
                        IndexReader primary_reader;
 
459
                        LNS.IndexSearcher primary_searcher;
 
460
                        IndexReader secondary_reader;
 
461
                        LNS.IndexSearcher secondary_searcher;
 
462
 
 
463
                        // Create the searchers that we will need.
 
464
 
 
465
                        BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher);
 
466
 
 
467
                        b.Stop ();
 
468
                        if (Debug)
 
469
                                Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b);
 
470
 
 
471
                        // Build whitelists and blacklists for search subsets.
 
472
                        c.Start ();
 
473
 
 
474
                        // Possibly create our whitelists from the search subset.
 
475
                        LuceneBitArray primary_whitelist, secondary_whitelist;
 
476
                        CreateQueryWhitelists (search_subset_uris,
 
477
                                primary_searcher,
 
478
                                secondary_searcher,
 
479
                                primary_prohibited_part_query,
 
480
                                secondary_prohibited_part_query,
 
481
                                out primary_whitelist,
 
482
                                out secondary_whitelist);
330
483
 
331
484
                        c.Stop ();
332
485
                        if (Debug)
361
514
                        // Only generate results if we got some matches
362
515
                        if (primary_matches != null && primary_matches.ContainsTrue ()) {
363
516
                                GenerateQueryResults (primary_reader,
364
 
                                                      primary_searcher,
365
 
                                                      secondary_searcher,
 
517
                                                      secondary_reader,
366
518
                                                      primary_matches,
367
519
                                                      result,
368
520
                                                      term_list,
382
534
                        //
383
535
 
384
536
                        f.Start ();
385
 
                        
386
 
                        primary_searcher.Close ();
387
 
                        if (secondary_searcher != null)
388
 
                                secondary_searcher.Close ();
389
 
                        ReleaseReader (primary_reader);
390
 
                        if (secondary_reader != null)
391
 
                                ReleaseReader (secondary_reader);
392
 
 
 
537
                        CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher);
393
538
                        f.Stop ();
394
539
                        
395
540
                        if (Debug)
426
571
                        LNS.BooleanQuery combined_query;
427
572
                        combined_query = new LNS.BooleanQuery ();
428
573
                        foreach (LNS.Query query in primary_queries)
429
 
                                combined_query.Add (query, true, false);
 
574
                                combined_query.Add (query, LNS.BooleanClause.Occur.MUST);
430
575
 
431
576
                        LuceneBitArray matches;
432
577
                        matches = new LuceneBitArray (primary_searcher, combined_query);
562
707
                        foreach (Term term in term_list) {
563
708
 
564
709
                                double idf;
565
 
                                idf = similarity.Ldf (reader.DocFreq (term), reader.MaxDoc ());
 
710
                                idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ());
566
711
 
567
712
                                int hit_count;
568
713
                                hit_count = hits_by_id.Count;
598
743
                //
599
744
 
600
745
                // Two arrays we need for quickly creating lucene documents and check if they are valid
601
 
                static string[] fields_timestamp_uri = { "Timestamp", "Uri" };
602
 
                static string[] fields_uri = {"Uri"};
 
746
                static FieldSelector fields_timestamp_uri = new MapFieldSelector (new string[] {"Uri", "Timestamp"});
 
747
                static FieldSelector fields_uri = new MapFieldSelector (new string[] {"Uri"});
603
748
 
604
749
                private static void GenerateQueryResults (IndexReader       primary_reader,
605
 
                                                          LNS.IndexSearcher primary_searcher,
606
 
                                                          LNS.IndexSearcher secondary_searcher,
 
750
                                                          IndexReader       secondary_reader,
607
751
                                                          BetterBitArray    primary_matches,
608
752
                                                          IQueryResult      result,
609
753
                                                          ICollection       query_term_list,
641
785
 
642
786
                        if (primary_matches.TrueCount > max_results)
643
787
                                final_list_of_hits = ScanRecentDocs (primary_reader,
644
 
                                        primary_searcher,
645
 
                                        secondary_searcher,
 
788
                                        secondary_reader,
646
789
                                        primary_matches,
647
790
                                        hits_by_id,
648
791
                                        max_results,
651
794
 
652
795
                        if (final_list_of_hits == null)
653
796
                                final_list_of_hits = FindRecentResults (primary_reader,
654
 
                                        primary_searcher,
655
 
                                        secondary_searcher,
 
797
                                        secondary_reader,
656
798
                                        primary_matches,
657
799
                                        hits_by_id,
658
800
                                        max_results,
743
885
                // for all of them.
744
886
 
745
887
                private static ArrayList ScanRecentDocs (IndexReader        primary_reader,
746
 
                                                    LNS.IndexSearcher       primary_searcher,
747
 
                                                    LNS.IndexSearcher       secondary_searcher,
 
888
                                                    IndexReader             secondary_reader,
748
889
                                                    BetterBitArray          primary_matches,
749
890
                                                    Dictionary<int, Hit>    hits_by_id,
750
891
                                                    int                     max_results,
763
904
 
764
905
                        Term term;
765
906
                        TermDocs secondary_term_docs = null;
766
 
                        if (secondary_searcher != null)
767
 
                                secondary_term_docs = secondary_searcher.Reader.TermDocs ();
 
907
                        if (secondary_reader != null)
 
908
                                secondary_term_docs = secondary_reader.TermDocs ();
768
909
 
769
910
                        do {
770
911
                                term = enumerator.Term ();
780
921
                                        int doc_id = docs.Doc ();
781
922
 
782
923
                                        if (primary_matches.Get (doc_id)) {
783
 
                                                Document doc = primary_searcher.Doc (doc_id);
 
924
                                                Document doc = primary_reader.Document (doc_id);
784
925
                                                // If we have a UriFilter, apply it.
785
926
                                                if (uri_filter != null) {
786
927
                                                        Uri uri;
787
928
                                                        uri = GetUriFromDocument (doc);
788
929
                                                        if (uri_filter (uri)) {
789
 
                                                                Hit hit = CreateHit (doc, secondary_searcher, secondary_term_docs);
 
930
                                                                Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs);
790
931
                                                                hits_by_id [doc_id] = hit;
791
932
                                                                // Add the result, last modified first
792
933
                                                                results.Add (hit);
825
966
                }
826
967
 
827
968
                private static ArrayList   FindRecentResults (IndexReader           primary_reader,
828
 
                                                              LNS.IndexSearcher primary_searcher,
829
 
                                                              LNS.IndexSearcher     secondary_searcher,
 
969
                                                              IndexReader           secondary_reader,
830
970
                                                              BetterBitArray        primary_matches,
831
971
                                                              Dictionary<int, Hit>  hits_by_id,
832
972
                                                              int                   max_results,
848
988
                        else
849
989
                                all_docs = new ArrayList (primary_matches.TrueCount);
850
990
 
851
 
                        if (secondary_searcher != null)
852
 
                                term_docs = secondary_searcher.Reader.TermDocs ();
 
991
                        if (secondary_reader != null)
 
992
                                term_docs = secondary_reader.TermDocs ();
853
993
 
854
994
                        for (int match_index = primary_matches.Count; ; match_index --) {
855
995
                                // Walk across the matches backwards, since newer
861
1001
 
862
1002
                                count++;
863
1003
 
864
 
                                doc = primary_searcher.Doc (match_index, fields_timestamp_uri);
 
1004
                                doc = primary_reader.Document (match_index, fields_timestamp_uri);
865
1005
 
866
1006
                                // Check the timestamp --- if we have already reached our
867
1007
                                // limit, we might be able to reject it immediately.
887
1027
 
888
1028
                                // Get the actual hit now
889
1029
                                // doc was created with only 2 fields, so first get the complete lucene document for primary document
890
 
                                Hit hit = CreateHit (primary_searcher.Doc (match_index), secondary_searcher, term_docs);
 
1030
                                Hit hit = CreateHit (primary_reader.Document (match_index), secondary_reader, term_docs);
891
1031
                                hits_by_id [match_index] = hit;
892
1032
 
893
1033
                                // Add the document to the appropriate data structure.
917
1057
                }
918
1058
 
919
1059
                private static Hit CreateHit ( Document primary_doc,
920
 
                                        LNS.IndexSearcher secondary_searcher,
 
1060
                                        IndexReader secondary_reader,
921
1061
                                        TermDocs term_docs)
922
1062
                {
923
1063
                        Hit hit = DocumentToHit (primary_doc);
924
1064
 
925
 
                        if (secondary_searcher == null)
 
1065
                        if (secondary_reader == null)
926
1066
                                return hit;
927
1067
 
928
1068
                        // Get the stringified version of the URI
932
1072
 
933
1073
                        // Move to the first (and only) matching term doc
934
1074
                        term_docs.Next ();
935
 
                        Document secondary_doc = secondary_searcher.Doc (term_docs.Doc ());
 
1075
                        Document secondary_doc = secondary_reader.Document (term_docs.Doc ());
936
1076
 
937
1077
                        // If we are using the secondary index, now we need to
938
1078
                        // merge the properties from the secondary index