~ubuntu-branches/ubuntu/jaunty/beagle/jaunty-security

« back to all changes in this revision

Viewing changes to beagled/LuceneCommon.cs

  • Committer: Bazaar Package Importer
  • Author(s): Stefan Ebner
  • Date: 2008-05-04 00:31:32 UTC
  • mfrom: (1.1.21 upstream)
  • Revision ID: james.westby@ubuntu.com-20080504003132-2tkm5o8moo5952ri
Tags: 0.3.7-2ubuntu1
 * Merge from Debian unstable. (LP: #225746) Remaining Ubuntu changes:
  - debian/control:
    + Rename ice{weasel,dove}-beagle to {mozilla,thunderbird}-beagle and
      and update the dependencies accordingly.
    + Change Maintainer to Ubuntu Mono Team.
  - debian/rules:
    + Install the mozilla-beagle and thunderbird-beagle extensions.
  - ice{dove,weasel}.dirs:
    + Renamed to {mozilla,thunderbird}-beagle.dirs.
    + Fixed paths to point to usr/lib/{firefox,thunderbird}

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
 
27
27
using System;
28
28
using System.Collections;
 
29
using System.Collections.Generic;
29
30
using System.Diagnostics;
30
31
using System.Globalization;
31
32
using System.IO;
42
43
using Lucene.Net.QueryParsers;
43
44
using LNS = Lucene.Net.Search;
44
45
 
 
46
using SF.Snowball.Ext;
 
47
using SnowballProgram = SF.Snowball.SnowballProgram;
 
48
 
45
49
using Beagle.Util;
46
50
 
47
51
namespace Beagle.Daemon {
83
87
                // 18: add IsPersistent to properties, and adjust coded values
84
88
                //     in AddPropertyToDocument() and GetPropertyFromDocument();
85
89
                //     changed subdate field format rules for better readability
86
 
                private const int MAJOR_VERSION = 18;
 
90
                // 19: Update lucene and analyzer (lucene.net-2.1)
 
91
                private const int MAJOR_VERSION = 19;
87
92
                private int minor_version = 0;
88
93
 
89
94
                private string index_name;
101
106
                private Lucene.Net.Store.Directory secondary_store = null;
102
107
 
103
108
                // Flush if more than this number of requests
104
 
                public const int RequestFlushThreshold = 37; // a total arbitrary magic number
 
109
                public const int RequestFlushThreshold = Lucene.Net.Index.IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; // Use same value as Lucene's flush threshold
105
110
 
106
111
                //////////////////////////////////////////////////////////////////////////////
107
112
 
336
341
                                // http://mail-archives.apache.org/mod_mbox/lucene-java-user/200504.mbox/%3c4265767B.5090307@getopt.org%3e
337
342
                                enumerator = reader.Terms ();
338
343
 
339
 
                                while (enumerator.Next ()) {
 
344
                                do {
340
345
                                        Term term = enumerator.Term ();
 
346
                                        if (term == null)
 
347
                                                break;
 
348
 
341
349
                                        positions = reader.TermPositions (term);
342
350
 
343
351
                                        while (positions.Next ()) {
348
356
                                        }
349
357
                                        positions.Close ();
350
358
                                        positions = null;
351
 
                                }
 
359
                                } while (enumerator.Next ());
352
360
 
353
361
                                enumerator.Close ();
354
362
                                enumerator = null;
379
387
 
380
388
                        // Create a new store.
381
389
                        Lucene.Net.Store.Directory store;
382
 
                        store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);
 
390
                        store = Lucene.Net.Store.FSDirectory.GetDirectory (path, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
383
391
 
384
392
                        // Create an empty index in that store.
385
393
                        IndexWriter writer;
437
445
                        reader.Close ();
438
446
 
439
447
                        // Create stores for our indexes.
440
 
                        primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
441
 
                        secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
 
448
                        // Use separate lock factories since each lock factory is tied to the index directory
 
449
                        if (read_only_mode) {
 
450
                                primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, Lucene.Net.Store.NoLockFactory.GetNoLockFactory ());
 
451
                                secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, Lucene.Net.Store.NoLockFactory.GetNoLockFactory ());
 
452
                        } else {
 
453
                                primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
 
454
                                secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
 
455
                        }
442
456
                }
443
457
 
444
458
                ////////////////////////////////////////////////////////////////
471
485
                }
472
486
 
473
487
                // FIXME: This assumes everything being indexed is in English!
474
 
                internal class BeagleAnalyzer : StandardAnalyzer {
 
488
                public class BeagleAnalyzer : StandardAnalyzer {
475
489
 
 
490
                        const string DEFAULT_STEMMER_LANGUAGE = "English";
476
491
                        private char [] buffer = new char [2];
477
492
                        private bool strip_extra_property_info = false;
478
493
                        private bool tokenize_email_hostname = false;
479
 
                        const string DEFAULT_STEMMER = "English";
480
494
 
481
495
                        public BeagleAnalyzer (bool is_indexing_analyzer)
482
496
                        {
533
547
                                    || fieldName == "PropertyText"
534
548
                                    || is_text_prop) {
535
549
                                        outstream = new NoiseEmailHostFilter (outstream, tokenize_email_hostname);
536
 
                                        outstream = new SnowballFilter (outstream, DEFAULT_STEMMER);
 
550
                                        // Sharing Stemmer is not thread safe.
 
551
                                        // Currently our underlying lucene indexing is not done in multiple threads.
 
552
                                        StemmerInfo stemmer_info = GetStemmer (DEFAULT_STEMMER_LANGUAGE);
 
553
                                        outstream = new SnowballFilter (outstream, stemmer_info.Stemmer, stemmer_info.StemMethod);
537
554
                                }
538
555
 
539
556
                                return outstream;
1033
1050
                // Access to the stemmer and list of stop words
1034
1051
                //
1035
1052
 
1036
 
                static SF.Snowball.Ext.EnglishStemmer stemmer = new SF.Snowball.Ext.EnglishStemmer ();
 
1053
                private static Dictionary<string, StemmerInfo> stemmer_table = new Dictionary<string, StemmerInfo> ();
 
1054
 
 
1055
                class StemmerInfo {
 
1056
                        internal SnowballProgram Stemmer;
 
1057
                        internal System.Reflection.MethodInfo StemMethod;
 
1058
                }
 
1059
 
 
1060
                private static StemmerInfo GetStemmer (System.String name)
 
1061
                {
 
1062
                        if (! stemmer_table.ContainsKey (name)) {
 
1063
                                StemmerInfo stemmer_info = new StemmerInfo ();
 
1064
 
 
1065
                                // Taken from Snowball/SnowballFilter.cs
 
1066
                                System.Type stemClass = System.Type.GetType ("SF.Snowball.Ext." + name + "Stemmer", true);
 
1067
                                SnowballProgram stemmer = (SnowballProgram) System.Activator.CreateInstance (stemClass);
 
1068
                                // why doesn't the SnowballProgram class have an (abstract?) stem method?
 
1069
                                System.Reflection.MethodInfo stemMethod = stemClass.GetMethod ("Stem", (new System.Type [0] == null) ? new System.Type [0] : (System.Type []) new System.Type [0]);
 
1070
 
 
1071
                                stemmer_info.Stemmer = stemmer;
 
1072
                                stemmer_info.StemMethod = stemMethod;
 
1073
                                stemmer_table [name] = stemmer_info;
 
1074
                        }
 
1075
 
 
1076
                        return stemmer_table [name];
 
1077
                }
 
1078
 
 
1079
                private static SF.Snowball.Ext.EnglishStemmer default_stemmer = new SF.Snowball.Ext.EnglishStemmer ();
1037
1080
 
1038
1081
                static public string Stem (string str)
1039
1082
                {
1040
1083
                        string stemmed_str;
1041
1084
 
1042
 
                        lock (stemmer) {
1043
 
                                stemmer.SetCurrent (str);
1044
 
                                stemmer.Stem ();
1045
 
                                stemmed_str = stemmer.GetCurrent ();
1046
 
                                stemmer.SetCurrent (String.Empty);
 
1085
                        lock (default_stemmer) {
 
1086
                                default_stemmer.SetCurrent (str);
 
1087
                                default_stemmer.Stem ();
 
1088
                                stemmed_str = default_stemmer.GetCurrent ();
1047
1089
                        }
1048
1090
 
1049
1091
                        return stemmed_str;
1332
1374
                                if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
1333
1375
                                        LNS.BooleanQuery sub_query;
1334
1376
                                        sub_query = new LNS.BooleanQuery ();
1335
 
                                        sub_query.Add (ym_query, true, false);
1336
 
                                        sub_query.Add (NewDayQuery (field_name, d1, d2), true, false);
1337
 
                                        top_level_query.Add (sub_query, false, false);
 
1377
                                        sub_query.Add (ym_query, LNS.BooleanClause.Occur.MUST);
 
1378
                                        sub_query.Add (NewDayQuery (field_name, d1, d2), LNS.BooleanClause.Occur.MUST);
 
1379
                                        top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
1338
1380
                                } else {
1339
 
                                        top_level_query.Add (ym_query, false, false);
 
1381
                                        top_level_query.Add (ym_query, LNS.BooleanClause.Occur.SHOULD);
1340
1382
                                }
1341
1383
 
1342
1384
                        } else {
1345
1387
                                if (d1 > 1) {
1346
1388
                                        LNS.BooleanQuery sub_query;
1347
1389
                                        sub_query = new LNS.BooleanQuery ();
1348
 
                                        sub_query.Add (NewYearMonthQuery (field_name, y1, m1), true, false);
1349
 
                                        sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), true, false);
1350
 
                                        top_level_query.Add (sub_query, false, false);
 
1390
                                        sub_query.Add (NewYearMonthQuery (field_name, y1, m1), LNS.BooleanClause.Occur.MUST);
 
1391
                                        sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), LNS.BooleanClause.Occur.MUST);
 
1392
                                        top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
1351
1393
                                        
1352
1394
                                        ++m1;
1353
1395
                                        if (m1 == 13) {
1360
1402
                                if (d2 < DateTime.DaysInMonth (y2, m2)) {
1361
1403
                                        LNS.BooleanQuery sub_query;
1362
1404
                                        sub_query = new LNS.BooleanQuery ();
1363
 
                                        sub_query.Add (NewYearMonthQuery (field_name, y2, m2), true, false);
1364
 
                                        sub_query.Add (NewDayQuery (field_name, 1, d2), true, false);
1365
 
                                        top_level_query.Add (sub_query, false, false);
 
1405
                                        sub_query.Add (NewYearMonthQuery (field_name, y2, m2), LNS.BooleanClause.Occur.MUST);
 
1406
                                        sub_query.Add (NewDayQuery (field_name, 1, d2), LNS.BooleanClause.Occur.MUST);
 
1407
                                        top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
1366
1408
 
1367
1409
                                        --m2;
1368
1410
                                        if (m2 == 0) {
1374
1416
                                // Generate the query for the "middle" of our period, if it is non-empty
1375
1417
                                if (y1 < y2 || ((y1 == y2) && m1 <= m2))
1376
1418
                                        top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
1377
 
                                                             false, false);
 
1419
                                                             LNS.BooleanClause.Occur.SHOULD);
1378
1420
                        }
1379
1421
                                
1380
1422
                        return top_level_query;
1432
1474
                                        LNS.Query subquery;
1433
1475
                                        subquery = StringToQuery ("Text", part.Text, term_list);
1434
1476
                                        if (subquery != null) {
1435
 
                                                p_query.Add (subquery, false, false);
 
1477
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1436
1478
                                                added_subquery = true;
1437
1479
                                        }
1438
1480
 
1439
1481
                                        // FIXME: HotText is ignored for now!
1440
1482
                                        // subquery = StringToQuery ("HotText", part.Text);
1441
1483
                                        // if (subquery != null) {
1442
 
                                        //    p_query.Add (subquery, false, false);
 
1484
                                        //    p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1443
1485
                                        //    added_subquery = true;
1444
1486
                                        // }
1445
1487
                                }
1448
1490
                                        LNS.Query subquery;
1449
1491
                                        subquery = StringToQuery ("PropertyText", part.Text, term_list);
1450
1492
                                        if (subquery != null) {
1451
 
                                                p_query.Add (subquery, false, false);
 
1493
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1452
1494
                                                // Properties can live in either index
1453
1495
                                                if (! only_build_primary_query)
1454
 
                                                        s_query.Add (subquery.Clone () as LNS.Query, false, false);
 
1496
                                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1455
1497
                                                added_subquery = true;
1456
1498
                                        }
1457
1499
 
1482
1524
                                                if (term_list != null)
1483
1525
                                                        term_list.Add (term);
1484
1526
                                                subquery = new LNS.TermQuery (term);
1485
 
                                                p_query.Add (subquery, false, false);
 
1527
                                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1486
1528
                                                // Properties can live in either index
1487
1529
                                                if (! only_build_primary_query)
1488
 
                                                        s_query.Add (subquery.Clone () as LNS.Query, false, false);
 
1530
                                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1489
1531
                                        } else {
1490
1532
                                                // Reset these so we return a null query
1491
1533
                                                p_query = null;
1515
1557
                                // Search text content
1516
1558
                                term = new Term ("Text", query_string_lower);
1517
1559
                                subquery = new LNS.WildcardQuery (term);
1518
 
                                p_query.Add (subquery, false, false);
 
1560
                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1519
1561
                                term_list.Add (term);
1520
1562
 
1521
1563
                                // Search text properties
1522
1564
                                term = new Term ("PropertyText", query_string_lower);
1523
1565
                                subquery = new LNS.WildcardQuery (term);
1524
 
                                p_query.Add (subquery, false, false);
 
1566
                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1525
1567
                                // Properties can live in either index
1526
1568
                                if (! only_build_primary_query)
1527
 
                                        s_query.Add (subquery.Clone () as LNS.Query, false, false);
 
1569
                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1528
1570
                                term_list.Add (term);
1529
1571
 
1530
1572
                                // Search property keywords
1531
1573
                                term = new Term ("PropertyKeyword", query_string_lower);
1532
1574
                                term_list.Add (term);
1533
1575
                                subquery = new LNS.WildcardQuery (term);
1534
 
                                p_query.Add (subquery, false, false);
 
1576
                                p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1535
1577
                                // Properties can live in either index
1536
1578
                                if (! only_build_primary_query)
1537
 
                                        s_query.Add (subquery.Clone () as LNS.Query, false, false);
 
1579
                                        s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1538
1580
 
1539
1581
                                primary_query = p_query;
1540
1582
                                if (! only_build_primary_query)
1587
1629
                                                          term_list, query_part_hook,
1588
1630
                                                          out p_subq, out s_subq, out sub_hit_filter);
1589
1631
                                        if (p_subq != null)
1590
 
                                                p_query.Add (p_subq, false, false);
 
1632
                                                p_query.Add (p_subq, LNS.BooleanClause.Occur.SHOULD);
1591
1633
                                        if (s_subq != null)
1592
 
                                                s_query.Add (s_subq, false, false);
 
1634
                                                s_query.Add (s_subq, LNS.BooleanClause.Occur.SHOULD);
1593
1635
                                        if (sub_hit_filter != null) {
1594
1636
                                                if (or_hit_filter == null)
1595
1637
                                                        or_hit_filter = new OrHitFilter ();
1678
1720
 
1679
1721
                        int cursor = 0;
1680
1722
                        if (extra_requirement != null) {
1681
 
                                top_query.Add (extra_requirement, true, false);
 
1723
                                top_query.Add (extra_requirement, LNS.BooleanClause.Occur.MUST);
1682
1724
                                ++cursor;
1683
1725
                        }
1684
1726
 
1690
1732
                                        LNS.BooleanQuery bq;
1691
1733
                                        bq = new LNS.BooleanQuery ();
1692
1734
                                        bottom_queries.Add (bq);
1693
 
                                        top_query.Add (bq, false, false);
 
1735
                                        top_query.Add (bq, LNS.BooleanClause.Occur.SHOULD);
1694
1736
                                }
1695
1737
                        }
1696
1738
 
1708
1750
                                                cursor = 0;
1709
1751
                                }
1710
1752
                                
1711
 
                                target.Add (subquery, false, false);
 
1753
                                target.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1712
1754
                        }
1713
1755
 
1714
1756
                        return top_query;