39
39
public class SnippetFu {
41
static public SnippetReader GetSnippet (string[] query_terms, TextReader line_reader, bool full_text)
41
static public SnippetReader GetSnippet (string[] query_terms, TextReader line_reader, bool full_text, int ctx_length, int snp_length)
43
43
// FIXME: If the query doesn't have search text (or is null), we should
44
44
// generate a 'summary snippet'.
46
46
if (line_reader == null)
49
SnippetReader snippet_reader = new SnippetReader (line_reader, query_terms, full_text);
49
SnippetReader snippet_reader = new SnippetReader (line_reader, query_terms, full_text, ctx_length, snp_length);
50
50
return snippet_reader;
53
static public SnippetReader GetSnippetFromFile (string[] query_terms, string filename, bool full_text)
53
static public SnippetReader GetSnippetFromFile (string[] query_terms, string filename, bool full_text, int ctx_length, int snp_length)
55
55
FileStream stream = new FileStream (filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
57
return GetSnippet (query_terms, new StreamReader (stream), full_text);
57
return GetSnippet (query_terms, new StreamReader (stream), full_text, ctx_length, snp_length);
60
static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text)
60
static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text, int ctx_length, int snp_length)
62
62
TextReader reader = TextCache.UserCache.GetReader (new Uri (filename));
63
63
if (reader == null)
66
return GetSnippet (query_terms, reader, full_text);
66
return GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
67
67
} catch (ICSharpCode.SharpZipLib.SharpZipBaseException ex) {
68
68
Log.Debug ("Unexpected exception '{0}' while extracting snippet for {1}", ex.Message, filename);
127
127
// Keep a sliding window of the starting positions of words
128
128
SlidingWindow sliding_window;
130
const int between_snippet_words = 6;
131
const int soft_snippet_limit = 200;
133
public SnippetReader (TextReader line_reader, string[] query_terms, bool full_text)
130
private const int context_length_default = 6;
131
private const int snippet_length_default = 200;
133
private int context_length;
134
private int snippet_length;
136
public SnippetReader (TextReader line_reader, string[] query_terms, bool full_text, int context_length, int snippet_length)
135
138
this.line_reader = line_reader;
136
139
this.found_snippet_length = 0;
137
140
this.full_text = full_text;
141
this.context_length = (context_length > 0 ? context_length : context_length_default);
142
this.snippet_length = (snippet_length > 0 ? snippet_length : snippet_length_default);
139
144
if (query_terms == null)
142
this.sliding_window = new SlidingWindow (between_snippet_words);
147
this.sliding_window = new SlidingWindow (this.context_length);
144
149
// remove stop words from query_terms
145
150
query_terms_list = new ArrayList (query_terms.Length);
182
187
SnippetLine snippet_line;
185
while (found_snippet_length < soft_snippet_limit) {
190
while (found_snippet_length < snippet_length) {
186
191
//Console.WriteLine ("Continue with last line ? {0}", continue_line);
187
192
if (! continue_line) {
273
278
// We cache the token, so as to avoid stemming it more than once
274
279
// when considering multiple terms.
275
280
if (stemmed_token == null) {
276
stemmed_token = LuceneCommon.Stem (token);
281
stemmed_token = LuceneCommon.Stem (token.ToLower ());
279
284
if (String.Compare ((string) stemmed_terms [i], stemmed_token, true) != 0)
307
312
// Add the start pos of the token to the window
308
313
sliding_window.Add (pos);
309
314
// If we found a match previously and saw enough following words, stop
310
if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == between_snippet_words) {
315
if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == context_length) {
311
316
sliding_window.Reset ();
312
317
string after_match = text.Substring (prev_match_end_pos, end_pos - prev_match_end_pos);
313
318
snippet_line.AddNonMatchFragment (after_match);