3
This module was written by Aureliano Rama and Simone Pagan Griso,
4
and modified by Douglas Thain. Igor Sfiligoi and Donatella Lucchesi
5
contributed significantly to the design and debugging of this system.
7
This software is distributed under the GNU General Public License.
8
See the file COPYING for details.
14
The Global Read-Only Web (GROW) filesystem is designed to make a
15
directory tree stored on a web server accessible over the wide area,
16
with aggressive caching and end-to-end integrity checks.
18
To create a GROW filesystem, run make_growfs on the root of the
19
filesystem, and export it via a web server. This script creates
20
a file .growfsdir that contains a complete directory listing and
21
checksum of all data. Upon first accessing the filesystem remotely,
22
GROW-FS loads the directory listing into a tree form in memory.
23
All metadata requests and directory lookups are handled using this
26
To access a file, GROW issues an HTTP request and reads the data
27
sequentially into the pfs_file_cache. A checksum is computed
28
incrementally. If the checksum does not match that in the directory
29
listing, the directory cache is discarded, and the close() fails
30
with EAGAIN, causing the pfs_file_cache to re-issue the open.
31
This procedure is repeated with an exponentially repeating backoff
32
until the filesystem becomes consistent.
34
The integrity of the directory listing is ensured by fetching
35
its checksum using https. If the master checksum
36
and the directory listing are inconsistent, they are reloaded
37
in the same way as files.
39
This scheme is designed to maximize the cacheability of all components
40
of the filesystem. Both files and data can be cached on local disk
41
without reomte consistency checks, as well as cached on shared proxy
42
servers, allowing the filesystem to scale to a very large number of clients.
44
(Note that GROW is designed to be an improvment over the old HTTP-FS
45
filesystem, which placed a listing in every single directory.)
49
#include "pfs_service.h"
53
#include "stringtools.h"
54
#include "domain_name.h"
56
#include "file_cache.h"
58
#include "http_query.h"
59
#include "hash_table.h"
63
#include "sleeptools.h"
74
#include <sys/statfs.h>
76
#define GROW_LINE_MAX 4096
78
#define GROW_EPOCH 1199163600
80
extern int pfs_master_timeout;
81
extern int pfs_checksum_files;
82
extern char pfs_temp_dir[];
83
extern struct file_cache * pfs_file_cache;
85
extern void pfs_abort();
86
extern int pfs_cache_invalidate( pfs_name *name );
88
static struct grow_filesystem * grow_filesystem_list = 0;
89
static sha1_context_t grow_filesystem_checksum;
92
A grow_filesystem structure represents an entire
93
filesystem rooted at a given host and path.
94
All known filesystem are kept in a linked list
95
rooted at grow_filesystem_list
98
struct grow_filesystem {
99
char hostport[PFS_PATH_MAX];
100
char path[PFS_PATH_MAX];
101
struct grow_dirent *root;
102
struct grow_filesystem *next;
106
A grow_dirent is a node in a tree representing the
107
entire directory structure of a grow_filesystem.
108
Each node describes its name, metadata, checksum,
109
and children (if a directory)
119
char checksum[SHA1_DIGEST_ASCII_LENGTH];
120
struct grow_dirent *children;
121
struct grow_dirent *parent;
122
struct grow_dirent *next;
125
void grow_dirent_delete( struct grow_dirent *d );
128
Compare two path strings only up to the first slash.
129
For example, "foo" matches "foo/bar/baz".
130
Return one if they match, zero otherwise.
133
static int compare_path_element( const char *a, const char *b )
137
if(*a=='/') return 1;
142
if(*a==0 && *b=='/') return 1;
143
if(*b==0 && *a=='/') return 1;
149
Compare two entire path strings to see if a is a prefix of b.
150
Return the remainder of b not matched by a.
151
For example, compare_path_prefix("foo/baz","foo/baz/bar") returns "/bar".
152
Return null if a is not a prefix of b.
155
static const char * compare_path_prefix( const char *a, const char *b )
158
if(*a=='/' && *b=='/') {
177
Recursively create a grow directory structure by reading
178
descriptor lines from a stored file.
182
struct grow_dirent * grow_dirent_create_from_file( FILE *file, struct grow_dirent *parent )
184
struct grow_dirent *d;
185
struct grow_dirent *list=0;
186
char line[GROW_LINE_MAX];
187
char name[GROW_LINE_MAX];
188
char linkname[GROW_LINE_MAX];
190
static INT64_T inode=2;
192
while(fgets(line,sizeof(line),file)) {
193
sha1_update(&grow_filesystem_checksum,(unsigned char*)line,strlen(line));
194
sha1_update(&grow_filesystem_checksum,(unsigned char*)"\n",1);
196
if(line[0]=='E') break;
198
d = (struct grow_dirent *) xxmalloc(sizeof(*d));
202
/* old large file format */
203
int fields = sscanf(line,"%c %[^\t]\t%d %*d %lld %*d %*d %ld %*d %s %[^\n]",
213
/* new more compact file format */
214
fields = sscanf(line,"%c %[^\t]\t%u %llu %ld %s %[^\n]",
223
d->mtime += GROW_EPOCH;
229
d->name = xstrdup(name);
231
d->linkname = xstrdup(linkname);
236
d->children = grow_dirent_create_from_file(file,d);
244
debug(D_GROW,"directory listing is corrupted!");
246
grow_dirent_delete(list);
255
Recursively destroy a directory structure.
258
void grow_dirent_delete( struct grow_dirent *d )
260
struct grow_dirent *n;
263
if(d->name) free(d->name);
264
if(d->linkname) free(d->linkname);
265
grow_dirent_delete(d->children);
272
void grow_dirent_to_stat( struct grow_dirent *d, struct pfs_stat *s )
275
s->st_ino = d->inode;
276
s->st_mode = d->mode;
281
s->st_size = d->size;
282
s->st_blksize = 65536;
283
s->st_blocks = 1+d->size/512;
284
s->st_atime = d->mtime;
285
s->st_mtime = d->mtime;
286
s->st_ctime = d->mtime;
290
Recursively search for the grow_dirent named by path
291
in the filesystem given by root. If link_count is zero,
292
then do not traverse symbolic links. Otherwise, when
293
link_count reaches 100, ELOOP is returned.
297
struct grow_dirent * grow_dirent_lookup_recursive( const char *path, struct grow_dirent *root, int link_count )
299
struct grow_dirent *d;
301
if(!path) path = "\0";
302
while(*path=='/') path++;
304
if( S_ISLNK(root->mode) && ( link_count>0 || path[0] ) ) {
310
char *linkname = root->linkname;
312
if(linkname[0]=='/') {
313
while(root->parent) {
320
root = grow_dirent_lookup_recursive(linkname,root,link_count+1);
327
if(!*path) return root;
329
if(!S_ISDIR(root->mode)) {
334
const char *subpath = strchr(path,'/');
335
if(!subpath) subpath = "\0";
337
if(compare_path_element(".",path)) {
338
return grow_dirent_lookup_recursive(subpath,root,link_count);
341
if(compare_path_element("..",path)) {
343
return grow_dirent_lookup_recursive(subpath,root->parent,link_count);
350
for(d=root->children;d;d=d->next) {
351
if(compare_path_element(d->name,path)) {
352
return grow_dirent_lookup_recursive(subpath,d,link_count);
361
Search for a grow filesystem rooted at the given host and path.
362
If the required files (.growfsdir and .growfschecksum) exist, then
363
create a grow filesystem struct and return it. If the two
364
are not consistent, delay and loop until they are.
365
Otherwise, return zero.
368
struct grow_filesystem * grow_filesystem_create( const char *hostport, const char *path )
370
unsigned char digest[SHA1_DIGEST_LENGTH];
371
unsigned char checksum[SHA1_DIGEST_ASCII_LENGTH];
372
char line[GROW_LINE_MAX];
373
char url[GROW_LINE_MAX];
374
char filename[GROW_LINE_MAX];
375
char txn[GROW_LINE_MAX];
376
struct grow_filesystem *f;
377
struct grow_dirent *d;
381
time_t stoptime = time(0)+pfs_master_timeout;
385
sprintf(url,"http://%s%s/.growfschecksum",hostport,path);
387
debug(D_GROW,"searching for filesystem at %s",url);
389
debug(D_GROW,"fetching checksum: %s",url);
391
link = http_query_no_cache(url,"GET",stoptime);
393
if(link_readline(link,line,sizeof(line),stoptime)) {
394
if(sscanf(line,"%s",checksum)) {
397
debug(D_GROW,"checksum is malformed!");
401
debug(D_GROW,"lost connection while fetching checksum!");
408
debug(D_GROW,"remote checksum is %s",checksum);
410
sprintf(url,"http://%s%s/.growfsdir",hostport,path);
412
if(file_cache_contains(pfs_file_cache,url,filename)!=0) {
414
debug(D_GROW,"fetching directory: %s",url);
416
int fd = file_cache_begin(pfs_file_cache,url,txn);
419
struct link *link = http_query_size(url,"GET",&size,stoptime,1);
421
if(link_stream_to_fd(link,fd,size,stoptime)>=0) {
422
file_cache_commit(pfs_file_cache,url,txn);
424
file_cache_abort(pfs_file_cache,url,txn);
428
file_cache_abort(pfs_file_cache,url,txn);
433
debug(D_GROW,"directory is already cached");
436
if(file_cache_contains(pfs_file_cache,url,filename)!=0) {
440
debug(D_GROW,"checksumming %s",filename);
442
if(!sha1_file(filename,digest)) {
443
debug(D_GROW,"couldn't checksum %s: %s",filename,strerror(errno));
447
debug(D_GROW,"local checksum: %s",sha1_string(digest));
449
if(strcmp((char*)checksum,sha1_string(digest))) {
450
debug(D_GROW,"checksum does not match, reloading...");
451
file_cache_delete(pfs_file_cache,url);
455
file = fopen(filename,"r");
457
debug(D_GROW,"couldn't open %s: %s",filename,strerror(errno));
461
d = grow_dirent_create_from_file(file,0);
463
debug(D_GROW,"%s is corrupted",filename);
465
file_cache_delete(pfs_file_cache,url);
471
f = (struct grow_filesystem *) malloc(sizeof(*f));
472
strcpy(f->hostport,hostport);
473
strcpy(f->path,path);
480
if(sleep_time<pfs_master_timeout) {
482
debug(D_GROW,"directory and checksum are inconsistent, retry in %d seconds",sleep_time);
483
sleep_for(sleep_time);
488
fatal("directory and checksum still inconsistent after %d seconds",pfs_master_timeout);
494
Recursively destroy a grow filesystem.
497
void grow_filesystem_delete( struct grow_filesystem *f )
500
grow_dirent_delete(f->root);
501
grow_filesystem_delete(f->next);
506
Destroy all internal state for all filesystems.
507
This is called whenever a file checksum is found
508
to be inconsistent, and the state must be reloaded.
511
void grow_filesystem_flush_all()
513
grow_filesystem_delete(grow_filesystem_list);
514
grow_filesystem_list = 0;
518
Given a full PFS path name, search for an already-loaded
519
filesystem record. If it exists, then search it for the
520
appropriate dirent. If no filesystem record is found,
521
then search for and load the needed filesystem.
524
struct grow_dirent * grow_dirent_lookup( pfs_name *name, int follow_links )
526
struct grow_filesystem *f;
527
char path[PFS_PATH_MAX];
531
for(f=grow_filesystem_list;f;f=f->next) {
532
if(!strcmp(f->hostport,name->hostport)) {
533
subpath = compare_path_prefix(f->path,name->rest);
535
subpath = compare_path_prefix(name->rest,f->path);
543
return grow_dirent_lookup_recursive(subpath,f->root,follow_links);
547
strcpy(path,name->rest);
549
f = grow_filesystem_create(name->hostport,path);
551
f->next = grow_filesystem_list;
552
grow_filesystem_list = f;
553
subpath = compare_path_prefix(f->path,name->rest);
554
return grow_dirent_lookup_recursive(subpath,f->root,follow_links);
556
s = strrchr(path,'/');
568
class pfs_file_grow : public pfs_file
573
sha1_context_t context;
576
pfs_file_grow( pfs_name *n, struct link *l, struct grow_dirent *d ) : pfs_file(n) {
578
grow_dirent_to_stat(d,&info);
579
if(pfs_checksum_files) {
584
virtual int close() {
587
struct grow_dirent *d;
588
d = grow_dirent_lookup(&name,1);
591
debug(D_GROW,"%s is no longer valid, will reload...",name);
592
grow_filesystem_flush_all();
595
} else if(!strcmp(d->checksum,"0")) {
597
} else if(pfs_checksum_files) {
598
unsigned char digest[SHA1_DIGEST_LENGTH];
599
sha1_final(digest,&context);
600
if(!strcmp(sha1_string(digest),d->checksum)) {
603
debug(D_GROW,"checksum failed on %s, will reload...",name.path);
604
grow_filesystem_flush_all();
613
virtual pfs_ssize_t read( void *d, pfs_size_t length, pfs_off_t offset ) {
615
actual = link_read(link,(char*)d,length,LINK_FOREVER);
616
if(pfs_checksum_files && actual>0) sha1_update(&context,(unsigned char *)d,actual);
620
virtual int fstat( struct pfs_stat *i ) {
626
This is a compatibility hack.
627
This filesystem is read only, so locks make no sense.
628
This simply satisfies some programs that insist upon it.
630
virtual int flock( int op ) {
634
virtual pfs_ssize_t get_size() {
640
class pfs_service_grow : public pfs_service {
642
virtual int get_default_port() {
646
virtual pfs_file * open( pfs_name *name, int flags, mode_t mode ) {
647
struct grow_dirent *d;
648
char url[PFS_PATH_MAX];
650
d = grow_dirent_lookup(name,1);
653
if(S_ISDIR(d->mode)) {
658
sprintf(url,"http://%s%s",name->hostport,name->rest);
660
struct link *link = http_query_no_cache(url,"GET",time(0)+pfs_master_timeout);
662
debug(D_GROW,"open %s",url);
663
return new pfs_file_grow(name,link,d);
665
debug(D_GROW,"failed to open %s",url);
670
pfs_dir * getdir( pfs_name *name ) {
671
struct grow_dirent *d;
673
d = grow_dirent_lookup(name,1);
676
if(!S_ISDIR(d->mode)) {
681
pfs_dir *dir = new pfs_dir(name);
684
if(d->parent) dir->append("..");
686
for(d=d->children;d;d=d->next) {
687
dir->append(d->name);
693
virtual int lstat( pfs_name *name, struct pfs_stat *info ) {
694
struct grow_dirent *d;
696
d = grow_dirent_lookup(name,0);
699
grow_dirent_to_stat(d,info);
704
virtual int stat( pfs_name *name, struct pfs_stat *info ) {
705
struct grow_dirent *d;
707
d = grow_dirent_lookup(name,1);
710
grow_dirent_to_stat(d,info);
715
virtual int unlink( pfs_name *name ) {
720
virtual int access( pfs_name *name, mode_t mode ) {
721
struct pfs_stat info;
722
if(this->stat(name,&info)==0) {
734
virtual int chmod( pfs_name *name, mode_t mode ) {
739
virtual int chown( pfs_name *name, uid_t uid, gid_t gid ) {
744
virtual int lchown( pfs_name *name, uid_t uid, gid_t gid ) {
749
virtual int truncate( pfs_name *name, pfs_off_t length ) {
754
virtual int utime( pfs_name *name, struct utimbuf *buf ) {
759
virtual int rename( pfs_name *oldname, pfs_name *newname ) {
764
virtual int chdir( pfs_name *name, char *newpath ) {
765
struct pfs_stat info;
766
if(this->stat(name,&info)==0) {
767
if(S_ISDIR(info.st_mode)) {
778
virtual int link( pfs_name *oldname, pfs_name *newname ) {
783
virtual int symlink( const char *linkname, pfs_name *newname ) {
788
virtual int readlink( pfs_name *name, char *buf, pfs_size_t bufsiz ) {
789
struct grow_dirent *d;
791
d = grow_dirent_lookup(name,0);
794
if(S_ISLNK(d->mode)) {
796
strncpy(buf,d->linkname,bufsiz);
797
length = MIN((unsigned)bufsiz,strlen(d->linkname));
806
virtual int mkdir( pfs_name *name, mode_t mode ) {
811
virtual int rmdir( pfs_name *name ) {
817
static pfs_service_grow pfs_service_grow_instance;
818
pfs_service *pfs_service_grow = &pfs_service_grow_instance;