2
* Copyright (C) 2005-2009 Junjiro R. Okajima
4
* This program, aufs is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
* file and vm operations
23
#include <linux/file.h>
24
#include <linux/fs_stack.h>
25
#include <linux/ima.h>
26
#include <linux/mman.h>
28
#include <linux/security.h>
31
/* common function to regular file and dir */
32
int aufs_flush(struct file *file, fl_owner_t id)
35
aufs_bindex_t bindex, bend;
36
struct dentry *dentry;
39
dentry = file->f_dentry;
40
si_noflush_read_lock(dentry->d_sb);
42
di_read_lock_child(dentry, AuLock_IW);
45
bend = au_fbend(file);
46
for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
47
h_file = au_h_fptr(file, bindex);
48
if (!h_file || !h_file->f_op || !h_file->f_op->flush)
51
err = h_file->f_op->flush(h_file, id);
53
vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
56
au_cpup_attr_timesizes(dentry->d_inode);
58
di_read_unlock(dentry, AuLock_IW);
60
si_read_unlock(dentry->d_sb);
64
/* ---------------------------------------------------------------------- */
66
static int do_open_nondir(struct file *file, int flags)
71
struct dentry *dentry;
72
struct au_finfo *finfo;
74
FiMustWriteLock(file);
77
dentry = file->f_dentry;
79
finfo->fi_h_vm_ops = NULL;
80
finfo->fi_vm_ops = NULL;
81
bindex = au_dbstart(dentry);
82
/* O_TRUNC is processed already */
83
BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode)
84
&& (flags & O_TRUNC));
86
h_file = au_h_open(dentry, bindex, flags, file);
88
err = PTR_ERR(h_file);
90
au_set_fbstart(file, bindex);
91
au_set_fbend(file, bindex);
92
au_set_h_fptr(file, bindex, h_file);
93
au_update_figen(file);
94
/* todo: necessary? */
95
/* file->f_ra = h_file->f_ra; */
100
static int aufs_open_nondir(struct inode *inode __maybe_unused,
103
return au_do_open(file, do_open_nondir);
106
static int aufs_release_nondir(struct inode *inode __maybe_unused,
109
struct super_block *sb = file->f_dentry->d_sb;
111
si_noflush_read_lock(sb);
112
kfree(au_fi(file)->fi_vm_ops);
118
/* ---------------------------------------------------------------------- */
120
static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
124
struct dentry *dentry;
126
struct super_block *sb;
128
dentry = file->f_dentry;
130
si_read_lock(sb, AuLock_FLUSH);
131
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
135
h_file = au_h_fptr(file, au_fbstart(file));
136
err = vfsub_read_u(h_file, buf, count, ppos);
137
/* todo: necessary? */
138
/* file->f_ra = h_file->f_ra; */
139
fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
141
di_read_unlock(dentry, AuLock_IR);
142
fi_read_unlock(file);
148
static ssize_t aufs_write(struct file *file, const char __user *ubuf,
149
size_t count, loff_t *ppos)
152
aufs_bindex_t bstart;
154
struct dentry *dentry;
156
struct super_block *sb;
158
char __user *buf = (char __user *)ubuf;
160
dentry = file->f_dentry;
162
inode = dentry->d_inode;
163
mutex_lock(&inode->i_mutex);
164
si_read_lock(sb, AuLock_FLUSH);
166
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
170
err = au_ready_to_write(file, -1, &pin);
171
di_downgrade_lock(dentry, AuLock_IR);
175
bstart = au_fbstart(file);
176
h_file = au_h_fptr(file, bstart);
178
err = vfsub_write_u(h_file, buf, count, ppos);
179
au_cpup_attr_timesizes(inode);
180
inode->i_mode = h_file->f_dentry->d_inode->i_mode;
183
di_read_unlock(dentry, AuLock_IR);
184
fi_write_unlock(file);
187
mutex_unlock(&inode->i_mutex);
191
static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
192
unsigned long nv, loff_t pos)
195
struct file *file, *h_file;
196
struct dentry *dentry;
197
struct super_block *sb;
200
dentry = file->f_dentry;
202
si_read_lock(sb, AuLock_FLUSH);
203
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
208
h_file = au_h_fptr(file, au_fbstart(file));
209
if (h_file->f_op && h_file->f_op->aio_read) {
210
err = security_file_permission(h_file, MAY_READ);
213
if (!is_sync_kiocb(kio)) {
217
kio->ki_filp = h_file;
218
err = h_file->f_op->aio_read(kio, iov, nv, pos);
219
/* todo: necessary? */
220
/* file->f_ra = h_file->f_ra; */
221
fsstack_copy_attr_atime(dentry->d_inode,
222
h_file->f_dentry->d_inode);
224
/* currently there is no such fs */
225
WARN_ON_ONCE(h_file->f_op && h_file->f_op->read);
228
di_read_unlock(dentry, AuLock_IR);
229
fi_read_unlock(file);
235
static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
236
unsigned long nv, loff_t pos)
239
aufs_bindex_t bstart;
241
struct dentry *dentry;
243
struct super_block *sb;
244
struct file *file, *h_file;
247
dentry = file->f_dentry;
249
inode = dentry->d_inode;
250
mutex_lock(&inode->i_mutex);
251
si_read_lock(sb, AuLock_FLUSH);
253
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
257
err = au_ready_to_write(file, -1, &pin);
258
di_downgrade_lock(dentry, AuLock_IR);
263
bstart = au_fbstart(file);
264
h_file = au_h_fptr(file, bstart);
266
if (h_file->f_op && h_file->f_op->aio_write) {
267
err = security_file_permission(h_file, MAY_WRITE);
270
if (!is_sync_kiocb(kio)) {
274
kio->ki_filp = h_file;
275
err = h_file->f_op->aio_write(kio, iov, nv, pos);
276
au_cpup_attr_timesizes(inode);
277
inode->i_mode = h_file->f_dentry->d_inode->i_mode;
279
/* currently there is no such fs */
280
WARN_ON_ONCE(h_file->f_op && h_file->f_op->write);
283
di_read_unlock(dentry, AuLock_IR);
284
fi_write_unlock(file);
287
mutex_unlock(&inode->i_mutex);
291
static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
292
struct pipe_inode_info *pipe, size_t len,
297
struct dentry *dentry;
298
struct super_block *sb;
300
dentry = file->f_dentry;
302
si_read_lock(sb, AuLock_FLUSH);
303
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
308
h_file = au_h_fptr(file, au_fbstart(file));
309
if (au_test_loopback_kthread()) {
310
file->f_mapping = h_file->f_mapping;
311
smp_mb(); /* unnecessary? */
313
err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
314
/* todo: necessasry? */
315
/* file->f_ra = h_file->f_ra; */
316
fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
318
di_read_unlock(dentry, AuLock_IR);
319
fi_read_unlock(file);
327
aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
328
size_t len, unsigned int flags)
332
struct dentry *dentry;
334
struct super_block *sb;
337
dentry = file->f_dentry;
338
inode = dentry->d_inode;
339
mutex_lock(&inode->i_mutex);
341
si_read_lock(sb, AuLock_FLUSH);
343
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
347
err = au_ready_to_write(file, -1, &pin);
348
di_downgrade_lock(dentry, AuLock_IR);
352
h_file = au_h_fptr(file, au_fbstart(file));
354
err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
355
au_cpup_attr_timesizes(inode);
356
inode->i_mode = h_file->f_dentry->d_inode->i_mode;
359
di_read_unlock(dentry, AuLock_IR);
360
fi_write_unlock(file);
363
mutex_unlock(&inode->i_mutex);
367
/* ---------------------------------------------------------------------- */
369
static struct file *au_safe_file(struct vm_area_struct *vma)
374
if (file->private_data && au_test_aufs(file->f_dentry->d_sb))
379
static void au_reset_file(struct vm_area_struct *vma, struct file *file)
382
/* smp_mb(); */ /* flush vm_file */
385
static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
388
static DECLARE_WAIT_QUEUE_HEAD(wq);
389
struct file *file, *h_file;
390
struct au_finfo *finfo;
392
/* todo: non-robr mode, user vm_file as it is? */
393
wait_event(wq, (file = au_safe_file(vma)));
395
/* do not revalidate, no si lock */
397
h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
398
AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
400
mutex_lock(&finfo->fi_vm_mtx);
401
vma->vm_file = h_file;
402
err = finfo->fi_h_vm_ops->fault(vma, vmf);
403
/* todo: necessary? */
404
/* file->f_ra = h_file->f_ra; */
405
au_reset_file(vma, file);
406
mutex_unlock(&finfo->fi_vm_mtx);
407
#if 0 /* def CONFIG_SMP */
408
/* wake_up_nr(&wq, online_cpu - 1); */
417
static int aufs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
420
static DECLARE_WAIT_QUEUE_HEAD(wq);
421
struct file *file, *h_file;
422
struct au_finfo *finfo;
424
wait_event(wq, (file = au_safe_file(vma)));
427
h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
428
AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
430
mutex_lock(&finfo->fi_vm_mtx);
431
vma->vm_file = h_file;
432
err = finfo->fi_h_vm_ops->page_mkwrite(vma, vmf);
433
au_reset_file(vma, file);
434
mutex_unlock(&finfo->fi_vm_mtx);
440
static void aufs_vm_close(struct vm_area_struct *vma)
442
static DECLARE_WAIT_QUEUE_HEAD(wq);
443
struct file *file, *h_file;
444
struct au_finfo *finfo;
446
wait_event(wq, (file = au_safe_file(vma)));
449
h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
450
AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
452
mutex_lock(&finfo->fi_vm_mtx);
453
vma->vm_file = h_file;
454
finfo->fi_h_vm_ops->close(vma);
455
au_reset_file(vma, file);
456
mutex_unlock(&finfo->fi_vm_mtx);
460
static struct vm_operations_struct aufs_vm_ops = {
461
/* .close and .page_mkwrite are not set by default */
465
/* ---------------------------------------------------------------------- */
467
static unsigned long au_prot_conv(unsigned long flags)
474
if (flags & VM_WRITE)
481
static struct vm_operations_struct *au_vm_ops(struct file *h_file,
482
struct vm_area_struct *vma)
484
struct vm_operations_struct *vm_ops;
487
vm_ops = ERR_PTR(-ENODEV);
488
if (!h_file->f_op || !h_file->f_op->mmap)
491
err = ima_file_mmap(h_file, au_prot_conv(vma->vm_flags));
492
vm_ops = ERR_PTR(err);
496
err = h_file->f_op->mmap(h_file, vma);
497
vm_ops = ERR_PTR(err);
501
/* oops, it became 'const' */
502
vm_ops = (struct vm_operations_struct *)vma->vm_ops;
503
err = do_munmap(current->mm, vma->vm_start,
504
vma->vm_end - vma->vm_start);
506
AuIOErr("failed internal unmapping %.*s, %d\n",
507
AuDLNPair(h_file->f_dentry), err);
508
vm_ops = ERR_PTR(-EIO);
515
static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma)
518
struct vm_operations_struct *h_ops;
520
AuRwMustAnyLock(&finfo->fi_rwsem);
523
h_ops = finfo->fi_h_vm_ops;
525
if ((!h_ops->page_mkwrite && !h_ops->close)
530
finfo->fi_vm_ops = kmemdup(&aufs_vm_ops, sizeof(aufs_vm_ops), GFP_NOFS);
531
if (unlikely(!finfo->fi_vm_ops))
535
if (h_ops->page_mkwrite)
536
finfo->fi_vm_ops->page_mkwrite = aufs_page_mkwrite;
538
finfo->fi_vm_ops->close = aufs_vm_close;
540
vma->vm_ops = finfo->fi_vm_ops;
546
static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
549
unsigned char wlock, mmapped;
550
struct dentry *dentry;
551
struct super_block *sb;
553
struct vm_operations_struct *vm_ops;
555
dentry = file->f_dentry;
556
wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
558
si_read_lock(sb, AuLock_FLUSH);
559
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
563
mmapped = !!au_test_mmapped(file);
567
err = au_ready_to_write(file, -1, &pin);
568
di_downgrade_lock(dentry, AuLock_IR);
573
di_downgrade_lock(dentry, AuLock_IR);
575
h_file = au_h_fptr(file, au_fbstart(file));
576
if (!mmapped && au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) {
578
* by this assignment, f_mapping will differs from aufs inode
580
* if someone else mixes the use of f_dentry->d_inode and
581
* f_mapping->host, then a problem may arise.
583
file->f_mapping = h_file->f_mapping;
588
vm_ops = au_vm_ops(h_file, vma);
589
err = PTR_ERR(vm_ops);
595
* unnecessary to handle MAP_DENYWRITE and deny_write_access()?
596
* currently MAP_DENYWRITE from userspace is ignored, but elf loader
597
* sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()),
598
* both of the aufs file and the lower file is deny_write_access()-ed.
599
* finally I hope we can skip handlling MAP_DENYWRITE here.
601
err = generic_file_mmap(file, vma);
605
vma->vm_ops = &aufs_vm_ops;
607
struct au_finfo *finfo = au_fi(file);
609
finfo->fi_h_vm_ops = vm_ops;
610
mutex_init(&finfo->fi_vm_mtx);
613
err = au_custom_vm_ops(au_fi(file), vma);
617
vfsub_file_accessed(h_file);
618
fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
621
di_read_unlock(dentry, AuLock_IR);
622
fi_write_unlock(file);
628
/* ---------------------------------------------------------------------- */
630
static int aufs_fsync_nondir(struct file *file, struct dentry *dentry,
637
struct super_block *sb;
639
inode = dentry->d_inode;
640
IMustLock(file->f_mapping->host);
641
if (inode != file->f_mapping->host) {
642
mutex_unlock(&file->f_mapping->host->i_mutex);
643
mutex_lock(&inode->i_mutex);
648
si_read_lock(sb, AuLock_FLUSH);
650
err = 0; /* -EBADF; */ /* posix? */
651
if (unlikely(!(file->f_mode & FMODE_WRITE)))
653
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
657
err = au_ready_to_write(file, -1, &pin);
658
di_downgrade_lock(dentry, AuLock_IR);
664
h_file = au_h_fptr(file, au_fbstart(file));
665
if (h_file->f_op && h_file->f_op->fsync) {
670
* no filemap_fdatawrite() since aufs file has no its own
673
h_d = h_file->f_dentry;
674
h_mtx = &h_d->d_inode->i_mutex;
675
mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
676
err = h_file->f_op->fsync(h_file, h_d, datasync);
678
vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
680
au_cpup_attr_timesizes(inode);
685
di_read_unlock(dentry, AuLock_IR);
686
fi_write_unlock(file);
689
if (inode != file->f_mapping->host) {
690
mutex_unlock(&inode->i_mutex);
691
mutex_lock(&file->f_mapping->host->i_mutex);
696
/* no one supports this operation, currently */
698
static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
702
struct dentry *dentry;
704
struct file *file, *h_file;
705
struct super_block *sb;
708
dentry = file->f_dentry;
709
inode = dentry->d_inode;
710
mutex_lock(&inode->i_mutex);
713
si_read_lock(sb, AuLock_FLUSH);
715
err = 0; /* -EBADF; */ /* posix? */
716
if (unlikely(!(file->f_mode & FMODE_WRITE)))
718
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
722
err = au_ready_to_write(file, -1, &pin);
723
di_downgrade_lock(dentry, AuLock_IR);
729
h_file = au_h_fptr(file, au_fbstart(file));
730
if (h_file->f_op && h_file->f_op->aio_fsync) {
734
h_d = h_file->f_dentry;
735
h_mtx = &h_d->d_inode->i_mutex;
736
if (!is_sync_kiocb(kio)) {
740
kio->ki_filp = h_file;
741
err = h_file->f_op->aio_fsync(kio, datasync);
742
mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
744
vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
746
au_cpup_attr_timesizes(inode);
751
di_read_unlock(dentry, AuLock_IR);
752
fi_write_unlock(file);
755
mutex_unlock(&inode->i_mutex);
760
static int aufs_fasync(int fd, struct file *file, int flag)
764
struct dentry *dentry;
765
struct super_block *sb;
767
dentry = file->f_dentry;
769
si_read_lock(sb, AuLock_FLUSH);
770
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
774
h_file = au_h_fptr(file, au_fbstart(file));
775
if (h_file->f_op && h_file->f_op->fasync)
776
err = h_file->f_op->fasync(fd, h_file, flag);
778
di_read_unlock(dentry, AuLock_IR);
779
fi_read_unlock(file);
786
/* ---------------------------------------------------------------------- */
788
/* no one supports this operation, currently */
790
static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
791
size_t len, loff_t *pos , int more)
796
/* ---------------------------------------------------------------------- */
798
const struct file_operations aufs_file_fop = {
800
* while generic_file_llseek/_unlocked() don't use BKL,
801
* don't use it since it operates file->f_mapping->host.
802
* in aufs, it may be a real file and may confuse users by UDBA.
804
/* .llseek = generic_file_llseek, */
808
.aio_read = aufs_aio_read,
809
.aio_write = aufs_aio_write,
810
#ifdef CONFIG_AUFS_POLL
813
.unlocked_ioctl = aufs_ioctl_nondir,
815
.open = aufs_open_nondir,
817
.release = aufs_release_nondir,
818
.fsync = aufs_fsync_nondir,
819
/* .aio_fsync = aufs_aio_fsync_nondir, */
820
.fasync = aufs_fasync,
821
/* .sendpage = aufs_sendpage, */
822
.splice_write = aufs_splice_write,
823
.splice_read = aufs_splice_read,
825
.aio_splice_write = aufs_aio_splice_write,
826
.aio_splice_read = aufs_aio_splice_read