1
// FilesystemVolumeScanner.cs
3
// Copyright (C) 2008 - 2011 Patrick Ulbrich
5
// This program is free software: you can redistribute it and/or modify
6
// it under the terms of the GNU General Public License as published by
7
// the Free Software Foundation, either version 3 of the License, or
8
// (at your option) any later version.
10
// This program is distributed in the hope that it will be useful,
11
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
// GNU General Public License for more details.
15
// You should have received a copy of the GNU General Public License
16
// along with this program. If not, see <http://www.gnu.org/licenses/>.
19
//#define DEBUG_FILE_VERBOSE
24
using System.Threading;
25
using System.Security.Cryptography;
26
using System.Collections.Generic;
28
using Platform.Common;
29
using Platform.Common.IO;
30
using Platform.Common.Mime;
31
using Platform.Common.Diagnostics;
32
using VolumeDB.Searching;
33
using VolumeDB.Searching.ItemSearchCriteria;
34
using VolumeDB.Metadata;
36
namespace VolumeDB.VolumeScanner
38
// TODO : EnsureOpen() in public members?
39
// TODO : override Dispose(bool) to e.g dispose/set null m_sbPathfixer?
40
public sealed class FilesystemVolumeScanner
41
: AbstractVolumeScanner<FileSystemVolume, FilesystemVolumeInfo, FilesystemScannerOptions>
43
private const char PATH_SEPARATOR = '/';
44
internal const string MIME_TYPE_DIRECTORY = "x-directory/normal";
46
private bool disposed;
47
//private MimeInfo mimeInfo;
48
private StringBuilder sbPathFixer;
50
private SymLinkHelper symLinkHelper;
51
private ThumbnailGenerator thumbGen;
54
// do not allow to modify the constuctor parameters
55
// (i.e. database, options)
56
// through public properties later, since the scanner
57
// may already use them after scanning has been started,
58
// and some stuff has been initialized depending on the
59
// options in the ctor already.
60
public FilesystemVolumeScanner(Platform.Common.IO.DriveInfo drive,
61
VolumeDatabase database,
62
FilesystemScannerOptions options)
63
: base(drive, database, options)
67
throw new ArgumentException("Drive is not mounted", "drive");
69
if (Options.GenerateThumbnails && string.IsNullOrEmpty(Options.DbDataPath))
70
throw new ArgumentException("DbDataPath",
71
"Thumbnail generation requires the DbDataPath option to be set");
74
//this.mimeInfo = new MimeInfo(false);
75
this.sbPathFixer = new StringBuilder(1024);
76
this.paths = new Paths(Options.DbDataPath, null, null);
77
this.symLinkHelper = new SymLinkHelper(this);
78
this.thumbGen = new ThumbnailGenerator();
81
internal override void ScanningThreadMain(Platform.Common.IO.DriveInfo drive,
82
FileSystemVolume volume,
83
BufferedVolumeItemWriter writer) {
85
if (Options.GenerateThumbnails) {
86
paths.volumeDataPath = DbData.CreateVolumeDataPath(paths.dbDataPath, volume.VolumeID);
87
paths.thumbnailPath = DbData.CreateVolumeDataThumbsPath(paths.volumeDataPath);
90
string rootPath = drive.RootPath;
91
// remove possible ending path seperator except for _system_ root paths
92
rootPath = RemoveEndingSlash(rootPath);
93
// if ((rootPath.Length > 1) && (rootPath[rootPath.Length - 1] == Path.DirectorySeparatorChar))
94
// rootPath = rootPath.Substring(0, rootPath.Length - 1);
96
// make sure the root path exists
97
// (media may have been removed after scanner construction)
98
if (!Directory.Exists(rootPath))
99
throw new DirectoryNotFoundException("Root path does not exist");
101
DirectoryInfo dir = new DirectoryInfo(rootPath);
102
RecursiveDump(rootPath, dir, writer, VolumeDatabase.ID_NONE);
103
symLinkHelper.InsertSymLinkItems(writer, volume.VolumeID);
105
volume.SetFileSystemVolumeFields(VolumeInfo.Files, VolumeInfo.Directories, VolumeInfo.Size);
106
} catch (Exception) {
109
if((paths.volumeDataPath != null) && Directory.Exists(paths.volumeDataPath))
110
Directory.Delete(paths.volumeDataPath, true);
111
} catch (Exception) { /* just shut up */ }
113
// rethrow initial exception
118
protected override void Reset() {
120
//Media.SetFilesystemMediaFields(0, -1, 0); // -1 : subtract root dir
121
symLinkHelper.Clear();
126
protected override void Dispose(bool disposing) {
135
symLinkHelper = null;
139
base.Dispose(disposing);
142
private void RecursiveDump(string rootPath,
144
BufferedVolumeItemWriter writer,
147
CheckForCancellationRequest();
149
/* event is called before a _directory_ item is about to be scanned only.
150
* it could also be called everytime a _file_ item is about to be scanned,
151
* but this could result in a performance loss.
153
PostBeforeScanItem(dir.FullName); //OnBeforeScanItem(new BeforeScanItemEventArgs(dir.FullName));
155
// bool dirIsSymLink = false;
156
// string symLinkTarget = null;
159
// catch possible FileNotFoundExceptions
160
// (e.g. on filesystems with wrong filename encoding or vanishing virtual files in /dev).
162
ft = FileHelper.GetFileType(dir.FullName, false);
163
} catch (FileNotFoundException ex) {
164
/* may throw ScanCancelledException */
165
SendScannerWarning(string.Format(S._("Directory '{0}' not found. (Wrong filename encoding?)"),
170
bool dirIsSymLink = (ft == FileType.SymbolicLink);
172
if ((ft != FileType.Directory) && !dirIsSymLink) {
173
/* may throw ScanCancelledException */
174
SendScannerWarning(string.Format(S._("Skipped item '{0}' as it doesn't seem to be a real directory."),
180
if (!Options.DiscardSymLinks) {
182
string symLinkTarget = null;
185
// get real path with all symlinks resolved
186
symLinkTarget = FileHelper
187
.GetCanonicalSymLinkTarget(dir.FullName);
188
} catch (FileNotFoundException) {}
191
// this check seems to be useless since a broken link
192
// to a directory is identified as a broken link to a _file_ (a few lines below).
193
if (symLinkTarget == null) {
194
/* may throw ScanCancelledException */
195
SendScannerWarning(string.Format(S._("Skipped symlink item '{0}' as the target does not exist."),
200
// skip symlinks outside of rootPath
201
// (in addition, GetLocation()/FixPath() need paths relative to rootPath)
202
if (!symLinkTarget.StartsWith(rootPath)) {
203
/* may throw ScanCancelledException */
204
SendScannerWarning(string.Format(S._("Skipped symlink item '{0}' as it appears to point to a different drive ('{1}')."),
210
symLinkHelper.AddSymLink(dir, symLinkTarget, rootPath, parentID, true);
212
/* do not dump symlinks to directories */
217
long dirID = InsertDir(rootPath, dir, writer, parentID);
219
// TODO : check m_cancel here (?)
221
// /* do not dump symlinks to directories */
226
/* insert files of dir */
227
FileInfo[] files = dir.GetFiles(); /* throws access exceptions (cant access _DIRECTORY_) */
228
for (int i = 0; i < files.Length; i++) {
229
CheckForCancellationRequest();
231
// bool isRegularFile = true;
232
// bool isSymLink = false;
234
//#if DEBUG && DEBUG_FILE_VERBOSE
235
if (Global.EnableDebugging) {
236
Debug.WriteLine(string.Format("Indexing file '{0}'", files[i].FullName));
239
// catch possible FileNotFoundExceptions
240
// (e.g. on filesystems with wrong filename encoding or vanishing virtual files in /dev).
242
ft = FileHelper.GetFileType(files[i].FullName, false);
243
} catch (FileNotFoundException ex) {
244
/* may throw ScanCancelledException */
245
SendScannerWarning(string.Format(S._("File '{0}' not found. (Wrong filename encoding?)"),
246
files[i].FullName), ex);
250
/* special files (fifos, blockdevices, chardevices) are skipped */
251
bool isRegularFile = (ft == FileType.RegularFile);
252
bool isSymLink = (ft == FileType.SymbolicLink);
256
string mimeType = null;
257
MetadataStore metaData = MetadataStore.Empty;
259
bool thumbGenerated = false;
261
FileStream fs = null;
263
// OpenRead() must be called _before_ MimeInfo.GetMimeType(),
264
// since this method returns a mimetype even if the file does not exist / can't be accessed.
265
fs = File.OpenRead(files[i].FullName); /* throws access/IO exceptions (cant access _FILE_) */
267
mimeType = MimeType.GetMimeTypeForFile(files[i].FullName);
269
if (Options.MetadataProviders != null) {
270
IEnumerable<MetadataItem> items = null;
272
foreach (MetadataProvider mdp in Options.MetadataProviders) {
273
IEnumerable<MetadataItem> tmp = mdp.GetMetadata(files[i].FullName, mimeType);
278
items = items.Concat(tmp);
282
metaData = new MetadataStore(items);
285
if (Options.ComputeHashs) {
286
hash = ComputeHash(fs);
287
// TODO : check m_cancel here? hashing can be a lengthy operation on big files.
290
if (Options.GenerateThumbnails) {
291
thumbGenerated = thumbGen.GenerateThumbnail(files[i], mimeType);
294
} catch (Exception e) {
295
// ### exception caught: hash, mime and/or metadata may be null
296
// and the thumbnail may not have been generated!
297
if (e is UnauthorizedAccessException || e is IOException) {
298
/* may throw ScanCancelledException */
299
SendScannerWarning(string.Format(S._("Error opening file '{0}', can't retrieve any mime/metadata. ({1})"),
311
long fileID = InsertFile(rootPath, files[i], writer, parentID, mimeType, metaData, hash);
313
thumbGen.SaveThumbnail(Path.Combine(paths.thumbnailPath, string.Format("{0}.png", fileID)));
315
} else if (isSymLink) {
317
if (!Options.DiscardSymLinks) {
319
string symLinkTarget = null;
322
// get real path with all symlinks resolved
323
symLinkTarget = FileHelper
324
.GetCanonicalSymLinkTarget(files[i].FullName);
325
} catch (FileNotFoundException) {}
327
if (symLinkTarget == null) {
328
/* may throw ScanCancelledException */
329
SendScannerWarning(string.Format(S._("Skipped symlink item '{0}' as the target does not exist."),
332
// skip symlinks outside of rootPath
333
// (in addition, GetLocation()/FixPath() need paths relative to rootPath)
334
} else if (!symLinkTarget.StartsWith(rootPath)) {
335
/* may throw ScanCancelledException */
336
SendScannerWarning(string.Format(S._("Skipped symlink item '{0}' as it appears to point to a different drive ('{1}')."),
340
// skip symlinks pointing to special files (only regular files are indexed)
341
} else if (FileHelper.GetFileType(symLinkTarget, false) != FileType.RegularFile) {
342
/* may throw ScanCancelledException */
343
SendScannerWarning(string.Format(S._("Skipped symlink item '{0}' as it does not point to a regular file ('{1}')."),
347
symLinkHelper.AddSymLink(files[i], symLinkTarget, rootPath, parentID, false);
351
/* may throw ScanCancelledException */
352
SendScannerWarning(string.Format(S._("Skipped item '{0}' as it appears to be some kind of special file."),
356
// TODO : check m_cancel here (?)
360
/* recursively dump subdirs */
361
DirectoryInfo[] childDirs = dir.GetDirectories(); /* throws access exceptions (cant access _DIRECTORY_) */
362
for (int i = 0; i < childDirs.Length; i++)
363
RecursiveDump(rootPath, childDirs[i], writer, parentID);
365
} catch (UnauthorizedAccessException e) {
366
//ScannerWarningEventArgs args = new ScannerWarningEventArgs("Unable to dump dir '" + dir.FullName + "'. (" + e.Message + ")", e);
367
//OnScannerWarning(args); // may throw ScanCancelledException
369
/* may throw ScanCancelledException */
370
SendScannerWarning(string.Format(S._("Unable to dump dir '{0}'. ({1})"),
377
private long InsertDir(string rootPath,
379
BufferedVolumeItemWriter writer,
381
/* if scanner has no db associated, just update the counters
385
// increase dircounter for symlink to dirs as well?
386
// nautilus refers to selected symlinks to dirs as dirs too.
387
Interlocked.Increment(ref VolumeInfo.directories);
388
return VolumeDatabase.ID_NONE;
394
/* if parentID is ID_NONE, the directory is the volumes root dir
395
* -> location = null, name = "/" (analog System.IO.DirectoryInfo)
397
if (parentID == VolumeDatabase.ID_NONE) {
399
name = PATH_SEPARATOR.ToString();
401
location = GetLocation(dir.FullName, rootPath);
405
DateTime lastWriteTime = GetLastWriteTime(dir);
407
DirectoryVolumeItem item = GetNewVolumeItem<DirectoryVolumeItem>(parentID,
411
VolumeItemType.DirectoryVolumeItem);
413
item.SetFileSystemVolumeItemFields(location, lastWriteTime, VolumeDatabase.ID_NONE);
414
//item.Name = name; // set the items name (defined on VolumeItem baseclass)
417
// /* don't dump symlink dirs directly into the database,
418
// * they're required to have a target item assigned.
419
// * target items are resolved in an additional step.
421
// symLinkItems.add(symLinkTarget, item);
427
// increase dircounter for symlink to dirs as well?
428
// nautilus refers to selected symlinks to dirs as dirs too.
429
Interlocked.Increment(ref VolumeInfo.directories);
431
if (!Options.DiscardSymLinks)
432
symLinkHelper.AddFile(dir.FullName, item.ItemID);
437
private long InsertFile(string rootPath,
439
BufferedVolumeItemWriter writer,
442
MetadataStore metaData,
444
/* if scanner has no db associated, just update the counters
448
Interlocked.Increment(ref VolumeInfo.files);
449
Interlocked.Add(ref VolumeInfo.size, file.Length);
450
return VolumeDatabase.ID_NONE;
453
DateTime lastWriteTime = GetLastWriteTime(file);
455
FileVolumeItem item = GetNewVolumeItem<FileVolumeItem>(parentID,
459
VolumeItemType.FileVolumeItem);
461
item.SetFileSystemVolumeItemFields(GetLocation(file.FullName, rootPath),
463
VolumeDatabase.ID_NONE);
465
item.SetFileVolumeItemFields(file.Length, hash);
466
//item.Name = file.Name; // set the items name (defined on VolumeItem baseclass)
470
Interlocked.Increment(ref VolumeInfo.files);
471
Interlocked.Add(ref VolumeInfo.size, file.Length);
473
if (!Options.DiscardSymLinks)
474
symLinkHelper.AddFile(file.FullName, item.ItemID);
479
private DateTime GetLastWriteTime(FileSystemInfo f) {
480
DateTime lastWriteTime;
481
// TODO : LastWriteTime fails on folders burned on CD (both, .net and mono).
482
// If it doesn't anymore this function can be removed.
484
lastWriteTime = f.LastWriteTime;
485
} catch (ArgumentOutOfRangeException e) {
486
lastWriteTime = DateTime.MinValue;
488
/* may throw ScanCancelledException */
489
SendScannerWarning(string.Format(S._("Can't read LastWriteTime from item '{0}' ({1})."),
493
return lastWriteTime;
496
// returns the location of a file/dir and fixes DirectorySeperatorChars
497
// NOTE: requires a path _relative_ to rootPath!
498
private string GetLocation(string fullName, string rootPath) {
499
// remove possible ending slash from dirs
500
fullName = RemoveEndingSlash(fullName);
501
// if ((fullName[fullName.Length - 1] == Path.DirectorySeparatorChar) && (fullName.Length > 1))
502
// fullName = fullName.Substring(0, fullName.Length - 1);
504
// check if the path is the rootPath
505
if (fullName.Length == rootPath.Length)
508
string dirName = Path.GetDirectoryName(fullName);
509
return FixPath(dirName, rootPath);
512
// removes rootPath and fixes DirectorySeperatorChars
513
// NOTE: requires a path _relative_ to rootPath!
514
private string FixPath(string fullName, string rootPath) {
515
// TODO : test under win32 and linux
517
// remove possible ending slash from dirs
518
fullName = RemoveEndingSlash(fullName);
519
// if ((fullName[fullName.Length - 1] == Path.DirectorySeparatorChar) && (fullName.Length > 1))
520
// fullName = fullName.Substring(0, fullName.Length - 1);
522
// check if the path is the rootPath
523
if (fullName.Length == rootPath.Length)
524
return PATH_SEPARATOR.ToString();
526
bool rootPathEqualsDirSeperator = (rootPath.Length == 1 && rootPath[0] == Path.DirectorySeparatorChar);
528
// if path is seperated by our PATH_SEPERATOR...
529
if (Path.DirectorySeparatorChar == PATH_SEPARATOR) {
530
// ... just remove rootPath (if it doesn't equal the dir seperator by accident)
531
if (!rootPathEqualsDirSeperator)
532
fullName = fullName.Substring(rootPath.Length);
534
System.Diagnostics.Debug.Assert(fullName[0] == PATH_SEPARATOR);
537
} else { // path is NOT seperated by our PATH_SEPERATOR...
538
// reset stringbuilder
539
sbPathFixer.Length = 0;
541
sbPathFixer.Append(fullName);
543
if (!rootPathEqualsDirSeperator) {
544
sbPathFixer.Remove(0, rootPath.Length);
545
sbPathFixer.Insert(0, PATH_SEPARATOR);
548
// replace platform dependent DirectorySeparatorChar by PATH_SEPERATOR
549
sbPathFixer.Replace(Path.DirectorySeparatorChar, PATH_SEPARATOR);
551
string s = sbPathFixer.ToString();
552
System.Diagnostics.Debug.Assert(s[0] == PATH_SEPARATOR);
553
System.Diagnostics.Debug.Assert(s.IndexOf(Path.DirectorySeparatorChar) == -1);
556
return sbPathFixer.ToString();
561
private static string RemoveEndingSlash(string path) {
562
// remove ending path separator from dirs,
563
// except for _system_ root paths ("/" on unix, "C:\", "D:\", ... on windows)
564
// (esp. important on windows as e.g. "D:" won't work with DirectoryInfo)
565
if ((path[path.Length - 1] == Path.DirectorySeparatorChar) && (path != Path.GetPathRoot(path)))
566
return path.Substring(0, path.Length - 1);
571
private static string ComputeHash(Stream s) {
572
StringBuilder sb = new StringBuilder(); // TODO : define at class level like sbPathFixer?
573
//using (FileStream fs = File.OpenRead(filePath)) {
574
MD5CryptoServiceProvider md5 = new MD5CryptoServiceProvider();
575
byte[] hash = md5.ComputeHash(s);
576
foreach (byte b in hash)
577
sb.Append(b.ToString("X2"));
579
return sb.ToString();
582
#region SymLinkHelper class
583
private class SymLinkHelper
585
private FilesystemVolumeScanner scanner;
586
private Dictionary<string, long> files;
587
private List<SymLinkItem> symLinkItems;
589
public SymLinkHelper(FilesystemVolumeScanner scanner) {
590
this.scanner = scanner;
591
this.files = new Dictionary<string, long>();
592
this.symLinkItems = new List<SymLinkItem>();
595
public void AddFile(string path, long id) {
599
public void AddSymLink(FileSystemInfo symLink,
600
string fullTargetPath,
605
SymLinkItem s = new SymLinkItem();
606
s.parentID = parentID;
607
s.name = symLink.Name;
608
s.location = scanner.GetLocation(symLink.FullName, rootPath);
609
s.fullPath = symLink.FullName;
610
s.fullTargetPath = fullTargetPath;
616
public void Clear() {
618
symLinkItems.Clear();
621
public void InsertSymLinkItems(BufferedVolumeItemWriter writer, long volumeID) {
622
if (symLinkItems.Count == 0)
625
/* if scanner has no db associated, just update the counters
627
if (!scanner.HasDB) {
628
foreach(SymLinkItem sli in symLinkItems) {
630
Interlocked.Increment(ref scanner.VolumeInfo.directories);
632
Interlocked.Increment(ref scanner.VolumeInfo.files);
635
// increase totalsize by size of symlinks too? (not size of target!)
636
// or are symlinks as big as dirs, those aren't respected as well..
637
//Interlocked.Add(ref VolumeInfo.size, sli.size);
642
// make sure all files/dirs have been written to the database
643
// before searching for symlink targets.
646
foreach (SymLinkItem sli in symLinkItems) {
648
scanner.CheckForCancellationRequest();
651
if (!files.TryGetValue(sli.fullTargetPath, out itemID)) {
652
/* may throw ScanCancelledException */
653
scanner.SendScannerWarning(string.Format(S._("Failed to resolve target item for symlink '{0}'."),
656
SearchCriteriaGroup g = new SearchCriteriaGroup(MatchRule.AllMustMatch);
657
g.AddSearchCriteria(new IDSearchCriteria(volumeID, IDSearchField.VolumeID, CompareOperator.Equal));
658
g.AddSearchCriteria(new IDSearchCriteria(itemID, IDSearchField.ItemID, CompareOperator.Equal));
660
// query target item.
661
// async BeginItemSearch() won't work here
662
// (active transaction prevents other threads from accessing the database)
663
VolumeItem[] queriedItems = scanner.Database.SearchItem(g);
665
FileSystemVolumeItem targetItem = (FileSystemVolumeItem)queriedItems[0];
666
FileSystemVolumeItem newItem;
668
if (targetItem is FileVolumeItem) {
669
newItem = scanner.GetNewVolumeItem<FileVolumeItem>(sli.parentID,
673
VolumeItemType.FileVolumeItem);
675
((FileVolumeItem)newItem).SetFileVolumeItemFields( ((FileVolumeItem)targetItem).Size,
676
((FileVolumeItem)targetItem).Hash);
678
Interlocked.Increment(ref scanner.VolumeInfo.files);
680
} else { // DirectoryVolumeItem
681
newItem = scanner.GetNewVolumeItem<DirectoryVolumeItem>(sli.parentID,
685
VolumeItemType.DirectoryVolumeItem);
687
Interlocked.Increment(ref scanner.VolumeInfo.directories);
690
newItem.SetFileSystemVolumeItemFields(sli.location,
691
targetItem.LastWriteTime,
694
writer.Write(newItem);
697
// increase totalsize by size of symlinks too? (not size of target!)
698
// or are symlinks as big as dirs, those aren't respected as well..
699
//Interlocked.Add(ref VolumeInfo.size, sli.size);
700
if (Global.EnableDebugging) {
701
Debug.WriteLine("Successfully resolved and saved symlink item: {0}/{1} -> {2}/{3}",
702
(sli.location == PATH_SEPARATOR.ToString() ? "" : sli.location),
704
(targetItem.Location == PATH_SEPARATOR.ToString() ? "" : targetItem.Location),
705
(targetItem.Name == PATH_SEPARATOR.ToString() ? "" : targetItem.Name));
711
private class SymLinkItem
713
public long parentID;
715
public string location;
716
public string fullPath;
717
public string fullTargetPath;
725
public string dbDataPath;
726
public string volumeDataPath;
727
public string thumbnailPath;
729
public Paths(string dbDataPath, string volumeDataPath, string thumbnailPath) {
730
this.dbDataPath = dbDataPath;
731
this.volumeDataPath = volumeDataPath;
732
this.thumbnailPath = thumbnailPath;