1
/* This file is part of Strigi Desktop Search
3
* Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
4
* Copyright (C) 2007 Flavio Castelli <flavio.castelli@gmail.com>
6
* This library is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Library General Public
8
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Library General Public License for more details.
16
* You should have received a copy of the GNU Library General Public License
17
* along with this library; see the file COPYING.LIB. If not, write to
18
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19
* Boston, MA 02110-1301, USA.
22
#include <strigi/filelister.h>
23
#include <strigi/strigiconfig.h>
24
#include <strigi/strigi_thread.h>
25
#include <strigi/analyzerconfiguration.h>
29
#include <sys/types.h>
33
#include "stgdirent.h" //dirent replacement (includes native if available)
41
// windows does not have symbolic links, so stat() is fine
42
#define strigi_lstat stat
44
#define strigi_lstat lstat
48
using namespace Strigi;
53
* @param path string containing path to check
54
* Removes the terminating char to path.
55
* Under Windows that char is '\', '/' under *nix
57
string fixPath (string path)
59
if ( path.c_str() == NULL || path.length() == 0 )
65
size_t l= temp.length();
66
char* t = (char*)temp.c_str();
67
for (size_t i=0;i<l;i++){
71
temp[0] = tolower(temp.at(0));
76
if (temp[temp.length() - 1 ] == separator)
77
return temp.substr(0, temp.size() - 1);
83
class FileLister::Private {
86
STRIGI_MUTEX_DEFINE(mutex);
90
string::size_type* len;
91
string::size_type* lenEnd;
92
string::size_type* curLen;
94
struct dirent* subdir;
96
set<string> listedDirs;
97
const AnalyzerConfiguration* const config;
99
Private(const AnalyzerConfiguration* ic);
101
int nextFile(string& p, time_t& time) {
103
STRIGI_MUTEX_LOCK(&mutex);
109
STRIGI_MUTEX_UNLOCK(&mutex);
112
void startListing(const std::string&);
115
FileLister::Private::Private(
116
const AnalyzerConfiguration* ic) :
118
STRIGI_MUTEX_INIT(&mutex);
120
dirs = (DIR**)malloc(sizeof(DIR*)*nOpenDirs);
121
dirsEnd = dirs + nOpenDirs;
122
len = (string::size_type*)malloc(sizeof(string::size_type)*nOpenDirs);
123
lenEnd = len + nOpenDirs;
127
FileLister::Private::startListing(const string& dir){
131
string::size_type len = dir.length();
133
strcpy(path, dir.c_str());
135
if (path[len-1] != '/') {
140
DIR* d = opendir(path);
143
listedDirs.insert (path);
151
FileLister::Private::~Private() {
152
while (curDir >= dirs) {
160
STRIGI_MUTEX_DESTROY(&mutex);
163
FileLister::Private::nextFile() {
165
while (curDir >= dirs) {
167
string::size_type l = *curLen;
168
subdir = readdir(dir);
170
// skip the directories '.' and '..'
171
char c1 = subdir->d_name[0];
173
char c2 = subdir->d_name[1];
174
if (c2 == '.' || c2 == '\0') {
175
subdir = readdir(dir);
179
strcpy(path + l, subdir->d_name);
180
string::size_type sl = l + strlen(subdir->d_name);
181
if (strigi_lstat(path, &dirstat) == 0) {
182
if (S_ISREG(dirstat.st_mode)) {
183
if (config == 0 || config->indexFile(path, path+l)) {
184
mtime = dirstat.st_mtime;
187
} else if (dirstat.st_mode & S_IFDIR && (config == 0
188
|| config->indexDir(path, path+l))) {
189
mtime = dirstat.st_mtime;
190
strcpy(this->path+sl, "/");
191
DIR* d = opendir(path);
197
listedDirs.insert ( path);
201
subdir = readdir(dir);
209
FileLister::FileLister(const AnalyzerConfiguration* ic)
210
: p(new Private(ic)) {
212
FileLister::~FileLister() {
216
FileLister::startListing(const string& dir) {
217
p->startListing(dir);
220
FileLister::nextFile(std::string& path, time_t& time) {
221
return p->nextFile(path, time);
224
FileLister::nextFile(const char*& path, time_t& time) {
225
int r = p->nextFile();
233
FileLister::skipTillAfter(const std::string& lastToSkip) {
234
int r = p->nextFile();
235
while (r >= 0 && p->path != lastToSkip) {
240
class DirLister::Private {
242
STRIGI_MUTEX_DEFINE(mutex);
243
list<string> todoPaths;
244
const AnalyzerConfiguration* const config;
246
Private(const AnalyzerConfiguration* ic) :config(ic) {}
247
int nextDir(std::string& path,
248
std::vector<std::pair<std::string, struct stat> >& dirs);
251
DirLister::DirLister(const AnalyzerConfiguration* ic)
252
: p(new Private(ic)) {
253
STRIGI_MUTEX_INIT(&p->mutex);
255
DirLister::~DirLister() {
256
STRIGI_MUTEX_DESTROY(&p->mutex);
260
DirLister::startListing(const string& dir) {
261
STRIGI_MUTEX_LOCK(&p->mutex);
262
p->todoPaths.push_back(dir);
263
STRIGI_MUTEX_UNLOCK(&p->mutex);
266
DirLister::stopListing() {
267
STRIGI_MUTEX_LOCK(&p->mutex);
268
p->todoPaths.clear();
269
STRIGI_MUTEX_UNLOCK(&p->mutex);
272
DirLister::Private::nextDir(std::string& path,
273
std::vector<std::pair<std::string, struct stat> >& dirs) {
276
size_t entrypathlength;
277
// check if there are more directories to work on
278
// open the directory
279
STRIGI_MUTEX_LOCK(&mutex);
280
if (todoPaths.empty()) {
281
STRIGI_MUTEX_UNLOCK(&mutex);
284
path.assign(todoPaths.front());
285
todoPaths.pop_front();
286
// Only unlock of the todo list is not empty.
287
// If the list is empty, other threads must wait for this thread to populate
289
bool mutexLocked = true;
290
if (!todoPaths.empty()) {
291
STRIGI_MUTEX_UNLOCK(&mutex);
294
entrypathlength = path.length()+1;
295
entrypath.assign(path);
296
entrypath.append("/");
300
dir = opendir(path.c_str());
302
// special case for root directory '/' on unix systems
308
STRIGI_MUTEX_UNLOCK(&mutex);
310
// if permission is denied, this is not an error
311
return (e == EACCES) ?0 :-1;
313
struct dirent* entry = readdir(dir);
314
struct stat entrystat;
316
entryname.assign(entry->d_name);
317
if (entryname != "." && entryname != "..") {
318
entrypath.resize(entrypathlength);
319
entrypath.append(entryname);
320
if (strigi_lstat(entrypath.c_str(), &entrystat) == 0) {
321
if (S_ISDIR(entrystat.st_mode)) {
324
entrypath.c_str(), entryname.c_str())) {
326
STRIGI_MUTEX_LOCK(&mutex);
328
todoPaths.push_back(entrypath);
329
STRIGI_MUTEX_UNLOCK(&mutex);
331
dirs.push_back(make_pair<string,struct stat>(
332
entrypath, entrystat));
334
} else if (config == 0 || config->indexFile(entrypath.c_str(),
335
entryname.c_str())) {
337
make_pair<string,struct stat>(entrypath, entrystat));
341
entry = readdir(dir);
345
STRIGI_MUTEX_UNLOCK(&mutex);
350
DirLister::nextDir(std::string& path,
351
std::vector<std::pair<std::string, struct stat> >& dirs) {
352
return p->nextDir(path, dirs);
355
DirLister::skipTillAfter(const std::string& lastToSkip) {
357
vector<pair<string, struct stat> > dirs;
358
while (nextDir(path, dirs) >= 0 && path != lastToSkip) {}