83
87
// 18: add IsPersistent to properties, and adjust coded values
84
88
// in AddPropertyToDocument() and GetPropertyFromDocument();
85
89
// changed subdate field format rules for better readability
86
private const int MAJOR_VERSION = 18;
90
// 19: Update lucene and analyzer (lucene.net-2.1)
91
private const int MAJOR_VERSION = 19;
87
92
private int minor_version = 0;
89
94
private string index_name;
101
106
private Lucene.Net.Store.Directory secondary_store = null;
103
108
// Flush if more than this number of requests
104
public const int RequestFlushThreshold = 37; // a total arbitrary magic number
109
public const int RequestFlushThreshold = Lucene.Net.Index.IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; // Use same value as Lucene's flush threshold
106
111
//////////////////////////////////////////////////////////////////////////////
336
341
// http://mail-archives.apache.org/mod_mbox/lucene-java-user/200504.mbox/%3c4265767B.5090307@getopt.org%3e
337
342
enumerator = reader.Terms ();
339
while (enumerator.Next ()) {
340
345
Term term = enumerator.Term ();
341
349
positions = reader.TermPositions (term);
343
351
while (positions.Next ()) {
380
388
// Create a new store.
381
389
Lucene.Net.Store.Directory store;
382
store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);
390
store = Lucene.Net.Store.FSDirectory.GetDirectory (path, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
384
392
// Create an empty index in that store.
385
393
IndexWriter writer;
439
447
// Create stores for our indexes.
440
primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
441
secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
448
// Use separate lock factories since each lock factory is tied to the index directory
449
if (read_only_mode) {
450
primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, Lucene.Net.Store.NoLockFactory.GetNoLockFactory ());
451
secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, Lucene.Net.Store.NoLockFactory.GetNoLockFactory ());
453
primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
454
secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, new Lucene.Net.Store.SimpleFSLockFactory (LockDirectory));
444
458
////////////////////////////////////////////////////////////////
473
487
// FIXME: This assumes everything being indexed is in English!
474
internal class BeagleAnalyzer : StandardAnalyzer {
488
public class BeagleAnalyzer : StandardAnalyzer {
490
const string DEFAULT_STEMMER_LANGUAGE = "English";
476
491
private char [] buffer = new char [2];
477
492
private bool strip_extra_property_info = false;
478
493
private bool tokenize_email_hostname = false;
479
const string DEFAULT_STEMMER = "English";
481
495
public BeagleAnalyzer (bool is_indexing_analyzer)
533
547
|| fieldName == "PropertyText"
534
548
|| is_text_prop) {
535
549
outstream = new NoiseEmailHostFilter (outstream, tokenize_email_hostname);
536
outstream = new SnowballFilter (outstream, DEFAULT_STEMMER);
550
// Sharing Stemmer is not thread safe.
551
// Currently our underlying lucene indexing is not done in multiple threads.
552
StemmerInfo stemmer_info = GetStemmer (DEFAULT_STEMMER_LANGUAGE);
553
outstream = new SnowballFilter (outstream, stemmer_info.Stemmer, stemmer_info.StemMethod);
539
556
return outstream;
1033
1050
// Access to the stemmer and list of stop words
1036
static SF.Snowball.Ext.EnglishStemmer stemmer = new SF.Snowball.Ext.EnglishStemmer ();
1053
private static Dictionary<string, StemmerInfo> stemmer_table = new Dictionary<string, StemmerInfo> ();
1056
internal SnowballProgram Stemmer;
1057
internal System.Reflection.MethodInfo StemMethod;
1060
private static StemmerInfo GetStemmer (System.String name)
1062
if (! stemmer_table.ContainsKey (name)) {
1063
StemmerInfo stemmer_info = new StemmerInfo ();
1065
// Taken from Snowball/SnowballFilter.cs
1066
System.Type stemClass = System.Type.GetType ("SF.Snowball.Ext." + name + "Stemmer", true);
1067
SnowballProgram stemmer = (SnowballProgram) System.Activator.CreateInstance (stemClass);
1068
// why doesn't the SnowballProgram class have an (abstract?) stem method?
1069
System.Reflection.MethodInfo stemMethod = stemClass.GetMethod ("Stem", (new System.Type [0] == null) ? new System.Type [0] : (System.Type []) new System.Type [0]);
1071
stemmer_info.Stemmer = stemmer;
1072
stemmer_info.StemMethod = stemMethod;
1073
stemmer_table [name] = stemmer_info;
1076
return stemmer_table [name];
1079
private static SF.Snowball.Ext.EnglishStemmer default_stemmer = new SF.Snowball.Ext.EnglishStemmer ();
1038
1081
static public string Stem (string str)
1040
1083
string stemmed_str;
1043
stemmer.SetCurrent (str);
1045
stemmed_str = stemmer.GetCurrent ();
1046
stemmer.SetCurrent (String.Empty);
1085
lock (default_stemmer) {
1086
default_stemmer.SetCurrent (str);
1087
default_stemmer.Stem ();
1088
stemmed_str = default_stemmer.GetCurrent ();
1049
1091
return stemmed_str;
1332
1374
if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
1333
1375
LNS.BooleanQuery sub_query;
1334
1376
sub_query = new LNS.BooleanQuery ();
1335
sub_query.Add (ym_query, true, false);
1336
sub_query.Add (NewDayQuery (field_name, d1, d2), true, false);
1337
top_level_query.Add (sub_query, false, false);
1377
sub_query.Add (ym_query, LNS.BooleanClause.Occur.MUST);
1378
sub_query.Add (NewDayQuery (field_name, d1, d2), LNS.BooleanClause.Occur.MUST);
1379
top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
1339
top_level_query.Add (ym_query, false, false);
1381
top_level_query.Add (ym_query, LNS.BooleanClause.Occur.SHOULD);
1346
1388
LNS.BooleanQuery sub_query;
1347
1389
sub_query = new LNS.BooleanQuery ();
1348
sub_query.Add (NewYearMonthQuery (field_name, y1, m1), true, false);
1349
sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), true, false);
1350
top_level_query.Add (sub_query, false, false);
1390
sub_query.Add (NewYearMonthQuery (field_name, y1, m1), LNS.BooleanClause.Occur.MUST);
1391
sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), LNS.BooleanClause.Occur.MUST);
1392
top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
1353
1395
if (m1 == 13) {
1360
1402
if (d2 < DateTime.DaysInMonth (y2, m2)) {
1361
1403
LNS.BooleanQuery sub_query;
1362
1404
sub_query = new LNS.BooleanQuery ();
1363
sub_query.Add (NewYearMonthQuery (field_name, y2, m2), true, false);
1364
sub_query.Add (NewDayQuery (field_name, 1, d2), true, false);
1365
top_level_query.Add (sub_query, false, false);
1405
sub_query.Add (NewYearMonthQuery (field_name, y2, m2), LNS.BooleanClause.Occur.MUST);
1406
sub_query.Add (NewDayQuery (field_name, 1, d2), LNS.BooleanClause.Occur.MUST);
1407
top_level_query.Add (sub_query, LNS.BooleanClause.Occur.SHOULD);
1374
1416
// Generate the query for the "middle" of our period, if it is non-empty
1375
1417
if (y1 < y2 || ((y1 == y2) && m1 <= m2))
1376
1418
top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
1419
LNS.BooleanClause.Occur.SHOULD);
1380
1422
return top_level_query;
1432
1474
LNS.Query subquery;
1433
1475
subquery = StringToQuery ("Text", part.Text, term_list);
1434
1476
if (subquery != null) {
1435
p_query.Add (subquery, false, false);
1477
p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1436
1478
added_subquery = true;
1439
1481
// FIXME: HotText is ignored for now!
1440
1482
// subquery = StringToQuery ("HotText", part.Text);
1441
1483
// if (subquery != null) {
1442
// p_query.Add (subquery, false, false);
1484
// p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1443
1485
// added_subquery = true;
1448
1490
LNS.Query subquery;
1449
1491
subquery = StringToQuery ("PropertyText", part.Text, term_list);
1450
1492
if (subquery != null) {
1451
p_query.Add (subquery, false, false);
1493
p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1452
1494
// Properties can live in either index
1453
1495
if (! only_build_primary_query)
1454
s_query.Add (subquery.Clone () as LNS.Query, false, false);
1496
s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1455
1497
added_subquery = true;
1482
1524
if (term_list != null)
1483
1525
term_list.Add (term);
1484
1526
subquery = new LNS.TermQuery (term);
1485
p_query.Add (subquery, false, false);
1527
p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1486
1528
// Properties can live in either index
1487
1529
if (! only_build_primary_query)
1488
s_query.Add (subquery.Clone () as LNS.Query, false, false);
1530
s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1490
1532
// Reset these so we return a null query
1491
1533
p_query = null;
1515
1557
// Search text content
1516
1558
term = new Term ("Text", query_string_lower);
1517
1559
subquery = new LNS.WildcardQuery (term);
1518
p_query.Add (subquery, false, false);
1560
p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1519
1561
term_list.Add (term);
1521
1563
// Search text properties
1522
1564
term = new Term ("PropertyText", query_string_lower);
1523
1565
subquery = new LNS.WildcardQuery (term);
1524
p_query.Add (subquery, false, false);
1566
p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1525
1567
// Properties can live in either index
1526
1568
if (! only_build_primary_query)
1527
s_query.Add (subquery.Clone () as LNS.Query, false, false);
1569
s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1528
1570
term_list.Add (term);
1530
1572
// Search property keywords
1531
1573
term = new Term ("PropertyKeyword", query_string_lower);
1532
1574
term_list.Add (term);
1533
1575
subquery = new LNS.WildcardQuery (term);
1534
p_query.Add (subquery, false, false);
1576
p_query.Add (subquery, LNS.BooleanClause.Occur.SHOULD);
1535
1577
// Properties can live in either index
1536
1578
if (! only_build_primary_query)
1537
s_query.Add (subquery.Clone () as LNS.Query, false, false);
1579
s_query.Add (subquery.Clone () as LNS.Query, LNS.BooleanClause.Occur.SHOULD);
1539
1581
primary_query = p_query;
1540
1582
if (! only_build_primary_query)
1587
1629
term_list, query_part_hook,
1588
1630
out p_subq, out s_subq, out sub_hit_filter);
1589
1631
if (p_subq != null)
1590
p_query.Add (p_subq, false, false);
1632
p_query.Add (p_subq, LNS.BooleanClause.Occur.SHOULD);
1591
1633
if (s_subq != null)
1592
s_query.Add (s_subq, false, false);
1634
s_query.Add (s_subq, LNS.BooleanClause.Occur.SHOULD);
1593
1635
if (sub_hit_filter != null) {
1594
1636
if (or_hit_filter == null)
1595
1637
or_hit_filter = new OrHitFilter ();
1679
1721
int cursor = 0;
1680
1722
if (extra_requirement != null) {
1681
top_query.Add (extra_requirement, true, false);
1723
top_query.Add (extra_requirement, LNS.BooleanClause.Occur.MUST);
1690
1732
LNS.BooleanQuery bq;
1691
1733
bq = new LNS.BooleanQuery ();
1692
1734
bottom_queries.Add (bq);
1693
top_query.Add (bq, false, false);
1735
top_query.Add (bq, LNS.BooleanClause.Occur.SHOULD);