2
* Copyright (C) 2010 Michal Hruby <michal.mhr@gmail.com>
4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
* Authored by Michal Hruby <michal.mhr@gmail.com>
23
* This plugin keeps a cache of file names for directories that are commonly
29
public class HybridSearchPlugin: Object, Activatable, ItemProvider
31
public unowned DataSink data_sink { get; construct; }
32
public bool enabled { get; set; default = true; }
34
public void activate ()
39
public void deactivate ()
44
private class MatchObject: Object, Match, UriMatch
46
// for Match interface
47
public string title { get; construct set; }
48
public string description { get; set; default = ""; }
49
public string icon_name { get; construct set; default = ""; }
50
public bool has_thumbnail { get; construct set; default = false; }
51
public string thumbnail_path { get; construct set; }
52
public MatchType match_type { get; construct set; }
55
public string uri { get; set; }
56
public QueryFlags file_type { get; set; }
57
public string mime_type { get; set; }
59
public MatchObject (string? thumbnail_path, string? icon)
61
Object (match_type: MatchType.GENERIC_URI,
62
has_thumbnail: thumbnail_path != null,
63
icon_name: icon ?? "",
64
thumbnail_path: thumbnail_path ?? "");
68
private class DirectoryInfo
71
public TimeVal last_update;
72
public Gee.Map<unowned string, Utils.FileInfo?> files;
74
public DirectoryInfo (string path)
76
this.files = new Gee.HashMap<unowned string, Utils.FileInfo?> ();
81
static void register_plugin ()
83
DataSink.PluginRegistry.get_default ().register_plugin (
84
typeof (HybridSearchPlugin),
86
_ ("Improve results returned by the Zeitgeist plugin by looking " +
87
"for similar files on the filesystem."),
100
directory_hits = new Gee.HashMap<string, int> ();
101
directory_contents = new Gee.HashMap<string, Utils.FileInfo?> ();
103
analyze_recent_documents ();
106
private bool initialization_done = false;
108
protected override void constructed ()
110
data_sink.search_done["SynapseZeitgeistPlugin"].connect (this.zg_plugin_search_done);
113
private const string RECENT_XML_NAME = "recently-used.xbel";
114
private const int MAX_RECENT_DIRS = 10;
116
private async void analyze_recent_documents ()
118
var recent = File.new_for_path (Path.build_filename (
119
Environment.get_home_dir (), "." + RECENT_XML_NAME, null));
123
uint8[] file_contents;
131
load_ok = yield recent.load_contents_async (null,
132
out file_contents, null);
134
catch (GLib.Error load_error)
139
// try again in datadir
142
recent = File.new_for_path (Path.build_filename (
143
Environment.get_user_data_dir (), RECENT_XML_NAME, null));
144
load_ok = yield recent.load_contents_async (null,
145
out file_contents, null);
150
contents = (string) file_contents;
151
len = file_contents.length;
153
// load all uris from recently-used bookmark file
154
var bf = new BookmarkFile ();
155
bf.load_from_data (contents, len);
156
string[] uris = bf.get_uris ();
158
// make a <string, int> map of directory occurences for the uris
159
Gee.Map<string, int> dir_hits = new Gee.HashMap<string, int> ();
161
foreach (unowned string uri in uris)
163
var f = File.new_for_uri (uri);
164
File? parent = f.get_parent ();
165
if (parent == null) continue;
166
string? parent_path = parent.get_path ();
167
if (parent_path == null) continue;
168
dir_hits[parent_path] = dir_hits[parent_path]+1;
171
// sort the map according to hits
172
Gee.List<Gee.Map.Entry<string, int>> sorted_dirs = new Gee.ArrayList<Gee.Map.Entry<string, int>> ();
173
sorted_dirs.add_all (dir_hits.entries);
174
sorted_dirs.sort ((a, b) =>
176
unowned Gee.Map.Entry<string, int> e1 =
177
(Gee.Map.Entry<string, int>) a;
178
unowned Gee.Map.Entry<string, int> e2 =
179
(Gee.Map.Entry<string, int>) b;
180
return e2.value - e1.value;
183
// pick first MAX_RECENT_DIRS items and scan those
184
Gee.List<string> directories = new Gee.ArrayList<string> ();
186
i<sorted_dirs.size && directories.size<MAX_RECENT_DIRS; i++)
188
string dir_path = sorted_dirs[i].key;
189
if (dir_path.has_prefix ("/tmp")) continue;
190
var dir_f = File.new_for_path (dir_path);
191
if (dir_f.is_native ())
194
exists = yield Utils.query_exists_async (dir_f);
195
if (exists) directories.add (dir_path);
199
yield process_directories (directories);
202
foreach (var x in directory_contents.entries)
204
z += x.value.files.size;
206
Utils.Logger.log (this, "keeps in cache now %d file names", z);
211
Utils.Logger.warning (this, "Unable to parse %s", recent.get_path ());
214
initialization_done = true;
217
public signal void zeitgeist_search_complete (ResultSet? rs, uint query_id);
219
private void zg_plugin_search_done (ResultSet? rs, uint query_id)
221
zeitgeist_search_complete (rs, query_id);
224
Gee.Map<string, int> directory_hits;
226
int current_level_uris = 0;
228
private async void process_uris (Gee.Collection<string> uris)
230
Gee.Set<string> dirs = new Gee.HashSet<string> ();
232
foreach (var uri in uris)
234
var f = File.new_for_uri (uri);
239
var fi = yield f.query_info_async (FileAttribute.STANDARD_TYPE,
241
if (fi.get_file_type () == FileType.REGULAR)
243
string? parent_path = f.get_parent ().get_path ();
244
if (parent_path != null) dirs.add (parent_path);
254
int q_len = current_query == null ? 1 : (int) current_query.length;
255
foreach (var dir in dirs)
257
if (directory_hits.has_key (dir))
259
int hit_count = directory_hits[dir];
260
directory_hits[dir] = hit_count + q_len;
264
directory_hits[dir] = q_len;
269
private Gee.List<string> get_most_likely_dirs ()
272
var result = new Gee.ArrayList<string> ();
274
if (directory_hits.size <= MAX_ITEMS)
276
// too few results, use all we have
277
foreach (var dir in directory_hits.keys) result.add (dir);
281
var sort_array = new Gee.ArrayList<Gee.Map.Entry<unowned string, int>> ();
282
int min_hit = int.MAX;
283
foreach (var entry in directory_hits.entries)
285
if (entry.value < min_hit) min_hit = entry.value;
287
foreach (var entry in directory_hits.entries)
289
if (entry.value > min_hit) sort_array.add (entry);
291
sort_array.sort ((a, b) =>
293
unowned Gee.Map.Entry<unowned string, int> e1 =
294
(Gee.Map.Entry<unowned string, int>) a;
295
unowned Gee.Map.Entry<unowned string, int> e2 =
296
(Gee.Map.Entry<unowned string, int>) b;
297
return e2.value - e1.value;
301
foreach (var entry in sort_array)
303
result.add (entry.key);
304
if (count++ >= MAX_ITEMS-1) break;
311
Gee.Map<string, DirectoryInfo> directory_contents;
313
private void process_directory_contents (DirectoryInfo di,
315
List<GLib.FileInfo> files)
317
di.last_update = TimeVal ();
318
foreach (var f in files)
320
unowned string name = f.get_name ();
321
// ignore common binary files
322
if (name.has_suffix (".o") || name.has_suffix (".lo") ||
323
name.has_suffix (".mo") || name.has_suffix (".gmo"))
327
var child = directory.get_child (name);
328
var file_info = new Utils.FileInfo (child.get_uri (), typeof (MatchObject));
329
di.files[file_info.uri] = file_info;
333
private async void update_directory_contents (GLib.File directory,
334
DirectoryInfo di) throws Error
336
Utils.Logger.debug (this, "Scanning %s", directory.get_path ());
337
var enumerator = yield directory.enumerate_children_async (
338
FileAttribute.STANDARD_NAME, 0, 0);
339
var files = yield enumerator.next_files_async (1024, 0);
342
process_directory_contents (di, directory, files);
345
private async void process_directories (Gee.Collection<string> directories)
347
foreach (var dir_path in directories)
349
var directory = File.new_for_path (dir_path);
353
if (directory_contents.has_key (dir_path))
355
var cur_time = TimeVal ();
356
di = directory_contents[dir_path];
357
if (cur_time.tv_sec - di.last_update.tv_sec <= 5 * 60)
359
// info fairly fresh, continue
365
di = new DirectoryInfo (dir_path);
366
directory_contents[dir_path] = di;
369
yield update_directory_contents (directory, di);
377
private async ResultSet get_extra_results (Query q,
378
ResultSet? original_rs,
379
Gee.Collection<string>? dirs)
382
uint num_results = 0;
383
bool enough_results = false;
384
var results = new ResultSet ();
386
// FIXME: casefold the parse_names, so we don't need CASELESS regexes
387
// but first find out if it really saves some time
388
var flags = RegexCompileFlags.OPTIMIZE | RegexCompileFlags.CASELESS;
389
var matchers = Query.get_matchers_for_query (q.query_string,
390
MatcherFlags.NO_FUZZY | MatcherFlags.NO_PARTIAL,
392
Gee.Collection<string> directories = dirs ?? directory_contents.keys;
393
foreach (var directory in directories)
395
var di = directory_contents[directory];
396
// check if we have fresh directory listing
397
var dir = File.new_for_path (directory);
400
var dir_info = yield dir.query_info_async ("time::*", 0, 0, null);
402
var t = dir_info.get_modification_time ();
405
dir_info.get_modification_time (out t);
407
if (t.tv_sec > di.last_update.tv_sec)
409
// the directory was changed, let's update
410
yield update_directory_contents (dir, di);
415
Utils.Logger.warning (this, "%s", err.message);
418
var rel_srv = RelevancyService.get_default ();
420
// only add the uri if it matches our query
421
foreach (var entry in di.files.entries)
423
foreach (var matcher in matchers)
425
Utils.FileInfo fi = entry.value;
426
if (matcher.key.match (fi.parse_name))
428
if (original_rs == null || !original_rs.contains_uri (fi.uri))
430
bool done_io = false;
431
if (!fi.is_initialized ())
433
yield fi.initialize ();
436
else if (fi.match_obj != null && fi.file_type in q.query_type)
438
// make sure the file still exists (could be deleted by now)
439
bool exists = yield fi.exists ();
443
// file info is now initialized
444
if (fi.match_obj != null && fi.file_type in q.query_type)
446
//Does match only the path, use base_relevancy like ZG plugin does for non-matched
447
int base_relevancy = Match.Score.POOR + Match.Score.INCREMENT_MINOR;
448
if (matcher.key.match (fi.match_obj.title))
450
//Matches title! Great news!
451
base_relevancy = matcher.value - Match.Score.URI_PENALTY;
453
float pop = rel_srv.get_uri_popularity (fi.uri);
454
results.add (fi.match_obj,
455
RelevancyService.compute_relevancy (base_relevancy, pop));
459
// the HashMap might have changed, if it did iterator.next ()
460
// will fail and we'll crash
461
// this here should prevent it, but it still needs more elegant fix
462
if (done_io) q.check_cancellable ();
467
if (num_results >= q.max_results)
469
enough_results = true;
474
q.check_cancellable ();
475
if (enough_results) break;
478
if (directories.size == 0) q.check_cancellable ();
480
Utils.Logger.debug (this, "found %d extra uris (ZG returned %d)",
481
results.size, original_rs == null ? 0 : original_rs.size);
486
private string? current_query = null;
488
public bool handles_query (Query query)
490
// we search everything but ACTIONS and APPLICATIONS
491
var our_results = QueryFlags.AUDIO | QueryFlags.DOCUMENTS
492
| QueryFlags.IMAGES | QueryFlags.UNCATEGORIZED | QueryFlags.VIDEO;
493
// FIXME: APPLICATIONS?
494
var common_flags = query.query_type & our_results;
496
return common_flags != 0;
499
public bool processing_query { get; private set; default = false; }
501
private async void wait_for_processing_finished ()
503
while (processing_query)
506
sig_id = this.notify["processing-query"].connect (() =>
508
if (processing_query) return;
509
wait_for_processing_finished.callback ();
513
SignalHandler.disconnect (this, sig_id);
517
public async ResultSet? search (Query q) throws SearchError
519
// ignore short searches
520
if (q.query_string.length <= 1) return null;
522
// FIXME: what about deleting one character?
523
if (current_query != null && !q.query_string.has_prefix (current_query))
526
current_level_uris = 0;
527
directory_hits.clear ();
530
uint query_id = q.query_id;
531
current_query = q.query_string;
532
int last_level_uris = current_level_uris;
533
ResultSet? original_rs = null;
534
Gee.Set<string> uris = new Gee.HashSet<string> ();
536
// wait for our signal or cancellable
537
ulong sig_id = this.zeitgeist_search_complete.connect ((rs, q_id) =>
539
if (q_id != query_id) return;
540
// let's mine directories ZG is aware of
541
foreach (var match in rs)
543
unowned UriMatch uri_match = match.key as UriMatch;
544
if (uri_match == null) continue;
545
uris.add (uri_match.uri);
550
ulong canc_sig_id = q.cancellable.connect (() =>
552
// who knows what thread this runs in
553
SignalHandler.block (this, sig_id); // is this thread-safe?
554
Idle.add (search.callback); // FIXME: this could cause issues
557
if (data_sink.is_plugin_enabled (Type.from_name ("SynapseZeitgeistPlugin")))
559
// wait for results from ZeitgeistPlugin
563
SignalHandler.disconnect (this, sig_id);
564
q.cancellable.disconnect (canc_sig_id);
566
q.check_cancellable ();
568
// make sure we've done the initial load
569
while (!initialization_done)
571
Timeout.add (250, search.callback);
573
q.check_cancellable ();
576
// we need a sort-of-a-lock here to prevent updating of the file caches
577
// by multiple queries at the same time
578
while (processing_query)
580
// FIXME: the while isn't really necessary, but let's be safe
581
yield wait_for_processing_finished ();
582
q.check_cancellable ();
584
processing_query = true;
588
// process results from the zeitgeist plugin
589
current_level_uris = uris.size;
590
if (current_level_uris > 0)
592
// extracts directories from the uris and updates directory_hits
593
yield process_uris (uris);
594
q.check_cancellable ();
598
// we weren't cancelled and we should have some directories and hits
599
if (hit_level > 1 && q.query_string.length >= 3)
601
// we want [current_level_uris / last_level_uris > 0.66]
602
if (current_level_uris * 3 > 2 * last_level_uris)
604
var directories = get_most_likely_dirs ();
605
/*if (!directories.is_empty)
607
debug ("we're in level: %d and we'd crawl these dirs >\n%s",
608
hit_level, string.joinv ("; ", directories.to_array ()));
610
yield process_directories (directories);
611
q.check_cancellable ();
615
// directory contents are updated now, we can take a look if any
616
// files match our query
618
// FIXME: run this sooner, it doesn't need to wait for the signal
619
var result = yield get_extra_results (q, original_rs, null);
624
processing_query = false;