~ubuntu-branches/ubuntu/jaunty/beagle/jaunty-security

« back to all changes in this revision

Viewing changes to beagled/SnippetFu.cs

  • Committer: Bazaar Package Importer
  • Author(s): Stefan Ebner
  • Date: 2008-05-04 00:31:32 UTC
  • mfrom: (1.1.21 upstream)
  • Revision ID: james.westby@ubuntu.com-20080504003132-2tkm5o8moo5952ri
Tags: 0.3.7-2ubuntu1
 * Merge from Debian unstable. (LP: #225746) Remaining Ubuntu changes:
  - debian/control:
    + Rename ice{weasel,dove}-beagle to {mozilla,thunderbird}-beagle and
      and update the dependencies accordingly.
    + Change Maintainer to Ubuntu Mono Team.
  - debian/rules:
    + Install the mozilla-beagle and thunderbird-beagle extensions.
  - ice{dove,weasel}.dirs:
    + Renamed to {mozilla,thunderbird}-beagle.dirs.
    + Fixed paths to point to usr/lib/{firefox,thunderbird}

Show diffs side-by-side

added added

removed removed

Lines of Context:
38
38
        
39
39
        public class SnippetFu {
40
40
 
41
 
                static public SnippetReader GetSnippet (string[] query_terms, TextReader line_reader, bool full_text)
 
41
                static public SnippetReader GetSnippet (string[] query_terms, TextReader line_reader, bool full_text, int ctx_length, int snp_length)
42
42
                {
43
43
                        // FIXME: If the query doesn't have search text (or is null), we should
44
44
                        // generate a 'summary snippet'.
46
46
                        if (line_reader == null)
47
47
                                return null;
48
48
 
49
 
                        SnippetReader snippet_reader = new SnippetReader (line_reader, query_terms, full_text);
 
49
                        SnippetReader snippet_reader = new SnippetReader (line_reader, query_terms, full_text, ctx_length, snp_length);
50
50
                        return snippet_reader;
51
51
                }
52
52
                
53
 
                static public SnippetReader GetSnippetFromFile (string[] query_terms, string filename, bool full_text)
 
53
                static public SnippetReader GetSnippetFromFile (string[] query_terms, string filename, bool full_text, int ctx_length, int snp_length)
54
54
                {
55
55
                        FileStream stream = new FileStream (filename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
56
56
 
57
 
                        return GetSnippet (query_terms, new StreamReader (stream), full_text);
 
57
                        return GetSnippet (query_terms, new StreamReader (stream), full_text, ctx_length, snp_length);
58
58
                }
59
59
 
60
 
                static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text)
 
60
                static public SnippetReader GetSnippetFromTextCache (string[] query_terms, string filename, bool full_text, int ctx_length, int snp_length)
61
61
                {
62
62
                        TextReader reader = TextCache.UserCache.GetReader (new Uri (filename));
63
63
                        if (reader == null)
64
64
                                return null;
65
65
                        try {
66
 
                                return GetSnippet (query_terms, reader, full_text);
 
66
                                return GetSnippet (query_terms, reader, full_text, ctx_length, snp_length);
67
67
                        } catch (ICSharpCode.SharpZipLib.SharpZipBaseException ex) {
68
68
                                Log.Debug ("Unexpected exception '{0}' while extracting snippet for {1}", ex.Message, filename);
69
69
                                return null;
127
127
                // Keep a sliding window of the starting positions of words
128
128
                SlidingWindow sliding_window;
129
129
 
130
 
                const int between_snippet_words = 6;
131
 
                const int soft_snippet_limit = 200;
132
 
 
133
 
                public SnippetReader (TextReader line_reader, string[] query_terms, bool full_text)
 
130
                private const int context_length_default = 6;
 
131
                private const int snippet_length_default = 200;
 
132
 
 
133
                private int context_length;
 
134
                private int snippet_length;
 
135
 
 
136
                public SnippetReader (TextReader line_reader, string[] query_terms, bool full_text, int context_length, int snippet_length)
134
137
                {
135
138
                        this.line_reader = line_reader;
136
139
                        this.found_snippet_length = 0;
137
140
                        this.full_text = full_text;
 
141
                        this.context_length = (context_length > 0 ? context_length : context_length_default);
 
142
                        this.snippet_length = (snippet_length > 0 ? snippet_length : snippet_length_default);
138
143
 
139
144
                        if (query_terms == null)
140
145
                                return;
141
146
 
142
 
                        this.sliding_window = new SlidingWindow (between_snippet_words);
 
147
                        this.sliding_window = new SlidingWindow (this.context_length);
143
148
 
144
149
                        // remove stop words from query_terms
145
150
                        query_terms_list = new ArrayList (query_terms.Length);
182
187
                        SnippetLine snippet_line;
183
188
                        ulong line = 0;
184
189
 
185
 
                        while (found_snippet_length < soft_snippet_limit) {
 
190
                        while (found_snippet_length < snippet_length) {
186
191
                                //Console.WriteLine ("Continue with last line ? {0}", continue_line);
187
192
                                if (! continue_line) {
188
193
                                        try {
273
278
                                        // We cache the token, so as to avoid stemming it more than once
274
279
                                        // when considering multiple terms.
275
280
                                        if (stemmed_token == null) {
276
 
                                                stemmed_token = LuceneCommon.Stem (token);
 
281
                                                stemmed_token = LuceneCommon.Stem (token.ToLower ());
277
282
                                        }
278
283
 
279
284
                                        if (String.Compare ((string) stemmed_terms [i], stemmed_token, true) != 0)
307
312
                                        // Add the start pos of the token to the window
308
313
                                        sliding_window.Add (pos);
309
314
                                        // If we found a match previously and saw enough following words, stop
310
 
                                        if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == between_snippet_words) {
 
315
                                        if (snippet_line != null && snippet_line.Count > 0 && sliding_window.Count == context_length) {
311
316
                                                sliding_window.Reset ();
312
317
                                                string after_match = text.Substring (prev_match_end_pos, end_pos - prev_match_end_pos);
313
318
                                                snippet_line.AddNonMatchFragment (after_match);