2
* Copyright (C) 2005-2009 Junjiro R. Okajima
4
* This program, aufs is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
* lookup and dentry operations
23
#include <linux/namei.h>
26
static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
32
* gave up supporting LOOKUP_CREATE/OPEN for lower fs,
33
* due to whiteout and branch permission.
35
h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
38
h_nd->intent.open.file = NULL;
40
memset(h_nd, 0, sizeof(*h_nd));
43
struct au_lkup_one_args {
46
struct dentry *h_parent;
51
struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
52
struct au_branch *br, struct nameidata *nd)
54
struct dentry *h_dentry;
56
struct nameidata h_nd;
58
if (au_test_fs_null_nd(h_parent->d_sb))
59
return vfsub_lookup_one_len(name->name, h_parent, name->len);
62
h_nd.path.dentry = h_parent;
63
h_nd.path.mnt = br->br_mnt;
65
err = __lookup_one_len(name->name, &h_nd.last, NULL, name->len);
66
h_dentry = ERR_PTR(err);
69
h_dentry = vfsub_lookup_hash(&h_nd);
76
static void au_call_lkup_one(void *args)
78
struct au_lkup_one_args *a = args;
79
*a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
82
#define AuLkup_ALLOW_NEG 1
83
#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
84
#define au_fset_lkup(flags, name) { (flags) |= AuLkup_##name; }
85
#define au_fclr_lkup(flags, name) { (flags) &= ~AuLkup_##name; }
87
struct au_do_lookup_args {
94
* returns positive/negative dentry, NULL or an error.
95
* NULL means whiteout-ed or not-found.
98
au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
99
aufs_bindex_t bindex, struct qstr *wh_name,
100
struct au_do_lookup_args *args)
102
struct dentry *h_dentry;
103
struct inode *h_inode, *inode;
105
struct au_branch *br;
107
unsigned char wh_able;
108
const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
110
name = &dentry->d_name;
112
br = au_sbr(dentry->d_sb, bindex);
113
wh_able = !!au_br_whable(br->br_perm);
115
wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
116
h_dentry = ERR_PTR(wh_found);
119
if (unlikely(wh_found < 0))
122
/* We found a whiteout */
123
/* au_set_dbend(dentry, bindex); */
124
au_set_dbwh(dentry, bindex);
126
return NULL; /* success */
129
h_dentry = au_lkup_one(name, h_parent, br, args->nd);
130
if (IS_ERR(h_dentry))
133
h_inode = h_dentry->d_inode;
138
|| (args->type && args->type != (h_inode->i_mode & S_IFMT)))
141
if (au_dbend(dentry) <= bindex)
142
au_set_dbend(dentry, bindex);
143
if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
144
au_set_dbstart(dentry, bindex);
145
au_set_h_dptr(dentry, bindex, h_dentry);
147
inode = dentry->d_inode;
148
if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
149
|| (inode && !S_ISDIR(inode->i_mode)))
150
goto out; /* success */
152
mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
153
opq = au_diropq_test(h_dentry, br);
154
mutex_unlock(&h_inode->i_mutex);
156
au_set_dbdiropq(dentry, bindex);
157
else if (unlikely(opq < 0)) {
158
au_set_h_dptr(dentry, bindex, NULL);
159
h_dentry = ERR_PTR(opq);
170
static int au_test_shwh(struct super_block *sb, const struct qstr *name)
172
if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
173
&& !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
179
* returns the number of lower positive dentries,
180
* otherwise an error.
181
* can be called at unlinking with @type is zero.
183
int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
184
struct nameidata *nd)
187
aufs_bindex_t bindex, btail, bdiropq;
190
struct au_do_lookup_args args = {
195
const struct qstr *name = &dentry->d_name;
196
struct dentry *parent;
199
parent = dget_parent(dentry);
200
err = au_test_shwh(dentry->d_sb, name);
204
err = au_wh_name_alloc(&whname, name);
208
inode = dentry->d_inode;
209
isdir = !!(inode && S_ISDIR(inode->i_mode));
211
au_fset_lkup(args.flags, ALLOW_NEG);
214
btail = au_dbtaildir(parent);
215
for (bindex = bstart; bindex <= btail; bindex++) {
216
struct dentry *h_parent, *h_dentry;
217
struct inode *h_inode, *h_dir;
219
h_dentry = au_h_dptr(dentry, bindex);
221
if (h_dentry->d_inode)
227
h_parent = au_h_dptr(parent, bindex);
230
h_dir = h_parent->d_inode;
231
if (!h_dir || !S_ISDIR(h_dir->i_mode))
234
mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
235
h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
237
mutex_unlock(&h_dir->i_mutex);
238
err = PTR_ERR(h_dentry);
239
if (IS_ERR(h_dentry))
241
au_fclr_lkup(args.flags, ALLOW_NEG);
243
if (au_dbwh(dentry) >= 0)
247
h_inode = h_dentry->d_inode;
252
args.type = h_inode->i_mode & S_IFMT;
253
if (args.type != S_IFDIR)
256
/* the type of lower may be different */
257
bdiropq = au_dbdiropq(dentry);
258
if (bdiropq >= 0 && bdiropq <= bindex)
265
au_update_dbstart(dentry);
268
if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
269
&& au_dbstart(dentry) < 0))
270
/* both of real entry and whiteout found */
280
struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
281
struct au_branch *br)
283
struct dentry *dentry;
286
if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
287
dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
289
struct au_lkup_one_args args = {
297
wkq_err = au_wkq_wait(au_call_lkup_one, &args);
298
if (unlikely(wkq_err))
299
dentry = ERR_PTR(wkq_err);
306
* lookup @dentry on @bindex which should be negative.
308
int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
311
struct dentry *parent, *h_parent, *h_dentry;
314
name = &dentry->d_name;
315
parent = dget_parent(dentry);
316
h_parent = au_h_dptr(parent, bindex);
317
h_dentry = au_sio_lkup_one(name, h_parent,
318
au_sbr(dentry->d_sb, bindex));
319
err = PTR_ERR(h_dentry);
320
if (IS_ERR(h_dentry))
322
if (unlikely(h_dentry->d_inode)) {
324
AuIOErr("b%d %.*s should be negative.\n",
325
bindex, AuDLNPair(h_dentry));
330
if (bindex < au_dbstart(dentry))
331
au_set_dbstart(dentry, bindex);
332
if (au_dbend(dentry) < bindex)
333
au_set_dbend(dentry, bindex);
334
au_set_h_dptr(dentry, bindex, h_dentry);
342
/* ---------------------------------------------------------------------- */
344
/* subset of struct inode */
347
/* unsigned int i_nlink; */
358
static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
360
ia->i_ino = h_inode->i_ino;
361
/* ia->i_nlink = h_inode->i_nlink; */
362
ia->i_uid = h_inode->i_uid;
363
ia->i_gid = h_inode->i_gid;
364
ia->i_version = h_inode->i_version;
366
ia->i_size = h_inode->i_size;
367
ia->i_blocks = h_inode->i_blocks;
369
ia->i_mode = (h_inode->i_mode & S_IFMT);
372
static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
374
return ia->i_ino != h_inode->i_ino
375
/* || ia->i_nlink != h_inode->i_nlink */
376
|| ia->i_uid != h_inode->i_uid
377
|| ia->i_gid != h_inode->i_gid
378
|| ia->i_version != h_inode->i_version
380
|| ia->i_size != h_inode->i_size
381
|| ia->i_blocks != h_inode->i_blocks
383
|| ia->i_mode != (h_inode->i_mode & S_IFMT);
386
static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
387
struct au_branch *br)
391
struct inode *h_inode;
393
struct super_block *h_sb;
396
memset(&ia, -1, sizeof(ia));
397
h_sb = h_dentry->d_sb;
398
h_inode = h_dentry->d_inode;
400
au_iattr_save(&ia, h_inode);
401
else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
402
/* nfs d_revalidate may return 0 for negative dentry */
403
/* fuse d_revalidate always return 0 for negative dentry */
406
/* main purpose is namei.c:cached_lookup() and d_revalidate */
407
h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
413
if (unlikely(h_d != h_dentry
414
|| h_d->d_inode != h_inode
415
|| (h_inode && au_iattr_test(&ia, h_inode))))
416
err = au_busy_or_stale();
424
int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
425
struct dentry *h_parent, struct au_branch *br)
430
if (udba == AuOpt_UDBA_REVAL) {
432
err = (h_dentry->d_parent->d_inode != h_dir);
433
} else if (udba == AuOpt_UDBA_HINOTIFY)
434
err = au_h_verify_dentry(h_dentry, h_parent, br);
439
/* ---------------------------------------------------------------------- */
441
static void au_do_refresh_hdentry(struct au_hdentry *p, struct au_dinfo *dinfo,
442
struct dentry *parent)
444
struct dentry *h_d, *h_dp;
445
struct au_hdentry tmp, *q;
446
struct super_block *sb;
447
aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
449
AuRwMustWriteLock(&dinfo->di_rwsem);
451
bend = dinfo->di_bend;
453
bdiropq = dinfo->di_bdiropq;
454
for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
459
h_dp = dget_parent(h_d);
460
if (h_dp == au_h_dptr(parent, bindex)) {
465
new_bindex = au_find_dbindex(parent, h_dp);
467
if (dinfo->di_bwh == bindex)
469
if (dinfo->di_bdiropq == bindex)
470
bdiropq = new_bindex;
471
if (new_bindex < 0) {
477
/* swap two lower dentries, and loop again */
478
q = dinfo->di_hdentry + new_bindex;
490
if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
493
dinfo->di_bdiropq = -1;
495
&& bdiropq <= au_sbend(sb)
496
&& au_sbr_whable(sb, bdiropq))
497
dinfo->di_bdiropq = bdiropq;
499
bend = au_dbend(parent);
500
p = dinfo->di_hdentry;
501
for (bindex = 0; bindex <= bend; bindex++, p++)
503
dinfo->di_bstart = bindex;
507
p = dinfo->di_hdentry + bend;
508
for (bindex = bend; bindex >= 0; bindex--, p--)
510
dinfo->di_bend = bindex;
516
* returns the number of found lower positive dentries,
517
* otherwise an error.
519
int au_refresh_hdentry(struct dentry *dentry, mode_t type)
523
aufs_bindex_t bstart;
524
struct au_dinfo *dinfo;
525
struct super_block *sb;
526
struct dentry *parent;
528
DiMustWriteLock(dentry);
531
AuDebugOn(IS_ROOT(dentry));
532
sigen = au_sigen(sb);
533
parent = dget_parent(dentry);
534
AuDebugOn(au_digen(parent) != sigen
535
|| au_iigen(parent->d_inode) != sigen);
537
dinfo = au_di(dentry);
538
err = au_di_realloc(dinfo, au_sbend(sb) + 1);
542
au_do_refresh_hdentry(dinfo->di_hdentry + dinfo->di_bstart, dinfo,
546
bstart = au_dbstart(parent);
547
if (type != S_IFDIR && dinfo->di_bstart == bstart)
548
goto out_dgen; /* success */
550
npositive = au_lkup_dentry(dentry, bstart, type, /*nd*/NULL);
553
if (dinfo->di_bwh >= 0 && dinfo->di_bwh <= dinfo->di_bstart)
557
au_update_digen(dentry);
560
AuTraceErr(npositive);
564
static noinline_for_stack
565
int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
566
struct dentry *dentry, aufs_bindex_t bindex)
569
int (*reval)(struct dentry *, struct nameidata *);
574
reval = h_dentry->d_op->d_revalidate;
578
AuDbg("b%d\n", bindex);
579
if (au_test_fs_null_nd(h_dentry->d_sb))
580
/* it may return tri-state */
581
valid = reval(h_dentry, NULL);
583
struct nameidata h_nd;
585
struct dentry *parent;
588
parent = nd->path.dentry;
589
locked = (nd && nd->path.dentry != dentry);
591
di_read_lock_parent(parent, AuLock_IR);
592
BUG_ON(bindex > au_dbend(parent));
593
h_nd.path.dentry = au_h_dptr(parent, bindex);
594
BUG_ON(!h_nd.path.dentry);
595
h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
596
path_get(&h_nd.path);
597
valid = reval(h_dentry, &h_nd);
598
path_put(&h_nd.path);
600
di_read_unlock(parent, AuLock_IR);
603
if (unlikely(valid < 0))
613
/* todo: remove this */
614
static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
615
struct nameidata *nd, int do_udba)
618
umode_t mode, h_mode;
619
aufs_bindex_t bindex, btail, bstart, ibs, ibe;
620
unsigned char plus, unhashed, is_root, h_plus;
621
struct inode *first, *h_inode, *h_cached_inode;
622
struct dentry *h_dentry;
623
struct qstr *name, *h_name;
631
unhashed = !!d_unhashed(dentry);
632
is_root = !!IS_ROOT(dentry);
633
name = &dentry->d_name;
636
* Theoretically, REVAL test should be unnecessary in case of INOTIFY.
637
* But inotify doesn't fire some necessary events,
638
* IN_ATTRIB for atime/nlink/pageio
639
* IN_DELETE for NFS dentry
640
* Let's do REVAL test too.
642
if (do_udba && inode) {
643
mode = (inode->i_mode & S_IFMT);
644
plus = (inode->i_nlink > 0);
645
first = au_h_iptr(inode, au_ibstart(inode));
646
ibs = au_ibstart(inode);
647
ibe = au_ibend(inode);
650
bstart = au_dbstart(dentry);
652
if (inode && S_ISDIR(inode->i_mode))
653
btail = au_dbtaildir(dentry);
654
for (bindex = bstart; bindex <= btail; bindex++) {
655
h_dentry = au_h_dptr(dentry, bindex);
659
AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
660
h_name = &h_dentry->d_name;
663
&& (unhashed != !!d_unhashed(h_dentry)
664
|| name->len != h_name->len
665
|| memcmp(name->name, h_name->name, name->len))
667
AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
668
unhashed, d_unhashed(h_dentry),
669
AuDLNPair(dentry), AuDLNPair(h_dentry));
673
err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
675
/* do not goto err, to keep the errno */
678
/* todo: plink too? */
683
h_inode = h_dentry->d_inode;
684
if (unlikely(!!inode != !!h_inode))
689
h_cached_inode = h_inode;
691
h_mode = (h_inode->i_mode & S_IFMT);
692
h_plus = (h_inode->i_nlink > 0);
694
if (inode && ibs <= bindex && bindex <= ibe)
695
h_cached_inode = au_h_iptr(inode, bindex);
697
if (unlikely(plus != h_plus
699
|| h_cached_inode != h_inode))
711
static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
714
struct dentry *parent;
717
inode = dentry->d_inode;
718
if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
721
parent = dget_parent(dentry);
722
di_read_lock_parent(parent, AuLock_IR);
723
AuDebugOn(au_digen(parent) != sigen
724
|| au_iigen(parent->d_inode) != sigen);
725
au_dbg_verify_gen(parent, sigen);
727
/* returns a number of positive dentries */
728
err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
730
err = au_refresh_hinode(inode, dentry);
732
di_read_unlock(parent, AuLock_IR);
737
int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
740
struct dentry *d, *parent;
743
if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS))
744
return simple_reval_dpath(dentry, sigen);
746
/* slow loop, keep it simple and stupid */
747
/* cf: au_cpup_dirs() */
750
while (au_digen(dentry) != sigen
751
|| au_iigen(dentry->d_inode) != sigen) {
755
parent = dget_parent(d);
756
if (au_digen(parent) == sigen
757
&& au_iigen(parent->d_inode) == sigen)
764
di_write_lock_child(d);
766
/* someone might update our dentry while we were sleeping */
767
if (au_digen(d) != sigen || au_iigen(d->d_inode) != sigen) {
768
di_read_lock_parent(parent, AuLock_IR);
769
/* returns a number of positive dentries */
770
err = au_refresh_hdentry(d, inode->i_mode & S_IFMT);
772
err = au_refresh_hinode(inode, d);
773
di_read_unlock(parent, AuLock_IR);
787
* if valid returns 1, otherwise 0.
789
static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
793
unsigned char do_udba;
794
struct super_block *sb;
799
inode = dentry->d_inode;
800
aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW);
801
sigen = au_sigen(sb);
802
if (au_digen(dentry) != sigen) {
803
AuDebugOn(IS_ROOT(dentry));
805
err = au_reval_dpath(dentry, sigen);
808
AuDebugOn(au_digen(dentry) != sigen);
810
if (inode && au_iigen(inode) != sigen) {
811
AuDebugOn(IS_ROOT(dentry));
812
err = au_refresh_hinode(inode, dentry);
815
AuDebugOn(au_iigen(inode) != sigen);
817
di_downgrade_lock(dentry, AuLock_IR);
819
AuDebugOn(au_digen(dentry) != sigen);
820
AuDebugOn(inode && au_iigen(inode) != sigen);
822
do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
823
if (do_udba && inode) {
824
aufs_bindex_t bstart = au_ibstart(inode);
827
&& au_test_higen(inode, au_h_iptr(inode, bstart)))
831
err = h_d_revalidate(dentry, inode, nd, do_udba);
832
if (unlikely(!err && do_udba && au_dbstart(dentry) < 0))
833
/* both of real entry and whiteout found */
838
di_downgrade_lock(dentry, AuLock_IR);
840
au_store_oflag(nd, inode);
841
aufs_read_unlock(dentry, AuLock_IR);
845
AuDbg("%.*s invalid\n", AuDLNPair(dentry));
849
static void aufs_d_release(struct dentry *dentry)
851
struct au_dinfo *dinfo;
852
aufs_bindex_t bend, bindex;
854
dinfo = dentry->d_fsdata;
858
/* dentry may not be revalidated */
859
bindex = dinfo->di_bstart;
861
struct au_hdentry *p;
863
bend = dinfo->di_bend;
864
p = dinfo->di_hdentry + bindex;
865
while (bindex++ <= bend) {
871
kfree(dinfo->di_hdentry);
872
AuRwDestroy(&dinfo->di_rwsem);
873
au_cache_free_dinfo(dinfo);
874
au_hin_di_reinit(dentry);
877
struct dentry_operations aufs_dop = {
878
.d_revalidate = aufs_d_revalidate,
879
.d_release = aufs_d_release