2
* Copyright (c) 1982, 1986, 1989, 1993
3
* The Regents of the University of California. All rights reserved.
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 4. Neither the name of the University nor the names of its contributors
14
* may be used to endorse or promote products derived from this software
15
* without specific prior written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
* @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95
32
#include <sys/cdefs.h>
33
__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_inode.c,v 1.108.2.5.4.1 2010/02/10 00:26:20 kensmith Exp $");
35
#include "opt_quota.h"
37
#include <sys/param.h>
38
#include <sys/systm.h>
39
#include <sys/mount.h>
43
#include <sys/vnode.h>
44
#include <sys/malloc.h>
45
#include <sys/resourcevar.h>
46
#include <sys/vmmeter.h>
50
#include <vm/vm_extern.h>
51
#include <vm/vm_object.h>
53
#include <ufs/ufs/extattr.h>
54
#include <ufs/ufs/quota.h>
55
#include <ufs/ufs/ufsmount.h>
56
#include <ufs/ufs/inode.h>
57
#include <ufs/ufs/ufs_extern.h>
59
#include <ufs/ffs/fs.h>
60
#include <ufs/ffs/ffs_extern.h>
62
static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
63
ufs2_daddr_t, int, ufs2_daddr_t *);
66
* Update the access, modified, and inode change times as specified by the
67
* IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode
68
* to disk if the IN_MODIFIED flag is set (it may be set initially, or by
69
* the timestamp update). The IN_LAZYMOD flag is set to force a write
70
* later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
71
* is currently being suspended (or is suspended) and vnode has been accessed.
72
* If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
73
* reflect the presumably successful write, and if waitfor is set, then wait
74
* for the write to complete.
77
ffs_update(vp, waitfor)
86
ASSERT_VOP_ELOCKED(vp, "ffs_update");
89
if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
91
ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
96
* Ensure that uid and gid are correct. This is a temporary
97
* fix until fsck has been changed to do the update.
99
if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */
100
fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */
101
ip->i_din1->di_ouid = ip->i_uid; /* XXX */
102
ip->i_din1->di_ogid = ip->i_gid; /* XXX */
104
error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
105
(int)fs->fs_bsize, NOCRED, &bp);
110
if (DOINGSOFTDEP(vp))
111
softdep_update_inodeblock(ip, bp, waitfor);
112
else if (ip->i_effnlink != ip->i_nlink)
113
panic("ffs_update: bad link cnt");
114
if (ip->i_ump->um_fstype == UFS1)
115
*((struct ufs1_dinode *)bp->b_data +
116
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
118
*((struct ufs2_dinode *)bp->b_data +
119
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
120
if (waitfor && !DOINGASYNC(vp)) {
122
} else if (vm_page_count_severe() || buf_dirty_count_severe()) {
125
if (bp->b_bufsize == fs->fs_bsize)
126
bp->b_flags |= B_CLUSTEROK;
133
ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
137
if ((object = vp->v_object) == NULL)
139
VM_OBJECT_LOCK(object);
140
vm_object_page_remove(object, start, end, FALSE);
141
VM_OBJECT_UNLOCK(object);
144
#define SINGLE 0 /* index of single indirect block */
145
#define DOUBLE 1 /* index of double indirect block */
146
#define TRIPLE 2 /* index of triple indirect block */
148
* Truncate the inode ip to at most length size, freeing the
152
ffs_truncate(vp, length, flags, cred, td)
160
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
161
ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
162
ufs2_daddr_t count, blocksreleased = 0, datablocks;
165
struct ufsmount *ump;
166
int needextclean, softdepslowdown, extblocks;
167
int offset, size, level, nblocks;
168
int i, error, allerror;
175
ASSERT_VOP_LOCKED(vp, "ffs_truncate");
180
* Historically clients did not have to specify which data
181
* they were truncating. So, if not specified, we assume
182
* traditional behavior, e.g., just the normal data.
184
if ((flags & (IO_EXT | IO_NORMAL)) == 0)
187
* If we are truncating the extended-attributes, and cannot
188
* do it with soft updates, then do it slowly here. If we are
189
* truncating both the extended attributes and the file contents
190
* (e.g., the file is being unlinked), then pick it off with
191
* soft updates below.
194
softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp);
196
datablocks = DIP(ip, i_blocks);
197
if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
198
extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
199
datablocks -= extblocks;
201
if ((flags & IO_EXT) && extblocks > 0) {
202
if (DOINGSOFTDEP(vp) && softdepslowdown == 0 && length == 0) {
203
if ((flags & IO_NORMAL) == 0) {
204
softdep_setup_freeblocks(ip, length, IO_EXT);
210
panic("ffs_truncate: partial trunc of extdata");
211
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
213
osize = ip->i_din2->di_extsize;
214
ip->i_din2->di_blocks -= extblocks;
216
(void) chkdq(ip, -extblocks, NOCRED, 0);
218
vinvalbuf(vp, V_ALT, td, 0, 0);
220
OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
221
ip->i_din2->di_extsize = 0;
222
for (i = 0; i < NXADDR; i++) {
223
oldblks[i] = ip->i_din2->di_extb[i];
224
ip->i_din2->di_extb[i] = 0;
226
ip->i_flag |= IN_CHANGE;
227
if ((error = ffs_update(vp, 1)))
229
for (i = 0; i < NXADDR; i++) {
232
ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i],
233
sblksize(fs, osize, i), ip->i_number);
237
if ((flags & IO_NORMAL) == 0)
239
if (length > fs->fs_maxfilesize)
241
if (vp->v_type == VLNK &&
242
(ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
246
panic("ffs_truncate: partial truncate of symlink");
248
bzero(SHORTLINK(ip), (u_int)ip->i_size);
250
DIP_SET(ip, i_size, 0);
251
ip->i_flag |= IN_CHANGE | IN_UPDATE;
253
softdep_setup_freeblocks(ip, length, IO_EXT);
254
return (ffs_update(vp, 1));
256
if (ip->i_size == length) {
257
ip->i_flag |= IN_CHANGE | IN_UPDATE;
259
softdep_setup_freeblocks(ip, length, IO_EXT);
260
return (ffs_update(vp, 0));
263
panic("ffs_truncate: read-only filesystem");
265
error = getinoquota(ip);
269
if ((ip->i_flags & SF_SNAPSHOT) != 0)
271
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
272
if (DOINGSOFTDEP(vp)) {
273
if (length > 0 || softdepslowdown) {
275
* If a file is only partially truncated, then
276
* we have to clean up the data structures
277
* describing the allocation past the truncation
278
* point. Finding and deallocating those structures
279
* is a lot of work. Since partial truncation occurs
280
* rarely, we solve the problem by syncing the file
281
* so that it will have no data structures left.
283
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
286
if (ip->i_flag & IN_SPACECOUNTED)
287
fs->fs_pendingblocks -= datablocks;
291
(void) chkdq(ip, -datablocks, NOCRED, 0);
293
softdep_setup_freeblocks(ip, length, needextclean ?
294
IO_EXT | IO_NORMAL : IO_NORMAL);
295
ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
296
vinvalbuf(vp, needextclean ? 0 : V_NORMAL, td, 0, 0);
298
ffs_pages_remove(vp, 0,
299
OFF_TO_IDX(lblktosize(fs, -extblocks)));
300
vnode_pager_setsize(vp, 0);
301
ip->i_flag |= IN_CHANGE | IN_UPDATE;
302
return (ffs_update(vp, 0));
307
* Lengthen the size of the file. We must ensure that the
308
* last byte of the file is allocated. Since the smallest
309
* value of osize is 0, length will be at least 1.
311
if (osize < length) {
312
vnode_pager_setsize(vp, length);
314
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
316
vnode_pager_setsize(vp, osize);
320
DIP_SET(ip, i_size, length);
321
if (bp->b_bufsize == fs->fs_bsize)
322
bp->b_flags |= B_CLUSTEROK;
327
ip->i_flag |= IN_CHANGE | IN_UPDATE;
328
return (ffs_update(vp, 1));
331
* Shorten the size of the file. If the file is not being
332
* truncated to a block boundary, the contents of the
333
* partial block following the end of the file must be
334
* zero'ed in case it ever becomes accessible again because
335
* of subsequent file growth. Directories however are not
336
* zero'ed as they should grow back initialized to empty.
338
offset = blkoff(fs, length);
341
DIP_SET(ip, i_size, length);
343
lbn = lblkno(fs, length);
345
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
350
* When we are doing soft updates and the UFS_BALLOC
351
* above fills in a direct block hole with a full sized
352
* block that will be truncated down to a fragment below,
353
* we must flush out the block dependency with an FSYNC
354
* so that we do not get a soft updates inconsistency
355
* when we create the fragment below.
357
if (DOINGSOFTDEP(vp) && lbn < NDADDR &&
358
fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
359
(error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
362
DIP_SET(ip, i_size, length);
363
size = blksize(fs, ip, lbn);
364
if (vp->v_type != VDIR)
365
bzero((char *)bp->b_data + offset,
366
(u_int)(size - offset));
367
/* Kirk's code has reallocbuf(bp, size, 1) here */
369
if (bp->b_bufsize == fs->fs_bsize)
370
bp->b_flags |= B_CLUSTEROK;
377
* Calculate index into inode's block list of
378
* last direct and indirect blocks (if any)
379
* which we want to keep. Lastblock is -1 when
380
* the file is truncated to 0.
382
lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
383
lastiblock[SINGLE] = lastblock - NDADDR;
384
lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
385
lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
386
nblocks = btodb(fs->fs_bsize);
388
* Update file and block pointers on disk before we start freeing
389
* blocks. If we crash before free'ing blocks below, the blocks
390
* will be returned to the free list. lastiblock values are also
391
* normalized to -1 for calls to ffs_indirtrunc below.
393
for (level = TRIPLE; level >= SINGLE; level--) {
394
oldblks[NDADDR + level] = DIP(ip, i_ib[level]);
395
if (lastiblock[level] < 0) {
396
DIP_SET(ip, i_ib[level], 0);
397
lastiblock[level] = -1;
400
for (i = 0; i < NDADDR; i++) {
401
oldblks[i] = DIP(ip, i_db[i]);
403
DIP_SET(ip, i_db[i], 0);
405
ip->i_flag |= IN_CHANGE | IN_UPDATE;
406
allerror = ffs_update(vp, 1);
409
* Having written the new inode to disk, save its new configuration
410
* and put back the old block pointers long enough to process them.
411
* Note that we save the new block configuration so we can check it
414
for (i = 0; i < NDADDR; i++) {
415
newblks[i] = DIP(ip, i_db[i]);
416
DIP_SET(ip, i_db[i], oldblks[i]);
418
for (i = 0; i < NIADDR; i++) {
419
newblks[NDADDR + i] = DIP(ip, i_ib[i]);
420
DIP_SET(ip, i_ib[i], oldblks[NDADDR + i]);
423
DIP_SET(ip, i_size, osize);
425
error = vtruncbuf(vp, cred, td, length, fs->fs_bsize);
426
if (error && (allerror == 0))
430
* Indirect blocks first.
432
indir_lbn[SINGLE] = -NDADDR;
433
indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
434
indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
435
for (level = TRIPLE; level >= SINGLE; level--) {
436
bn = DIP(ip, i_ib[level]);
438
error = ffs_indirtrunc(ip, indir_lbn[level],
439
fsbtodb(fs, bn), lastiblock[level], level, &count);
442
blocksreleased += count;
443
if (lastiblock[level] < 0) {
444
DIP_SET(ip, i_ib[level], 0);
445
ffs_blkfree(ump, fs, ip->i_devvp, bn,
446
fs->fs_bsize, ip->i_number);
447
blocksreleased += nblocks;
450
if (lastiblock[level] >= 0)
455
* All whole direct blocks or frags.
457
for (i = NDADDR - 1; i > lastblock; i--) {
460
bn = DIP(ip, i_db[i]);
463
DIP_SET(ip, i_db[i], 0);
464
bsize = blksize(fs, ip, i);
465
ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number);
466
blocksreleased += btodb(bsize);
472
* Finally, look for a change in size of the
473
* last direct block; release any frags.
475
bn = DIP(ip, i_db[lastblock]);
477
long oldspace, newspace;
480
* Calculate amount of space we're giving
481
* back as old block size minus new block size.
483
oldspace = blksize(fs, ip, lastblock);
485
DIP_SET(ip, i_size, length);
486
newspace = blksize(fs, ip, lastblock);
488
panic("ffs_truncate: newspace");
489
if (oldspace - newspace > 0) {
491
* Block number of space to be free'd is
492
* the old block # plus the number of frags
493
* required for the storage we're keeping.
495
bn += numfrags(fs, newspace);
496
ffs_blkfree(ump, fs, ip->i_devvp, bn,
497
oldspace - newspace, ip->i_number);
498
blocksreleased += btodb(oldspace - newspace);
503
for (level = SINGLE; level <= TRIPLE; level++)
504
if (newblks[NDADDR + level] != DIP(ip, i_ib[level]))
505
panic("ffs_truncate1");
506
for (i = 0; i < NDADDR; i++)
507
if (newblks[i] != DIP(ip, i_db[i]))
508
panic("ffs_truncate2");
511
(fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
512
(vp->v_bufobj.bo_dirty.bv_cnt > 0 ||
513
vp->v_bufobj.bo_clean.bv_cnt > 0))
514
panic("ffs_truncate3");
516
#endif /* INVARIANTS */
518
* Put back the real size.
521
DIP_SET(ip, i_size, length);
522
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased);
524
if (DIP(ip, i_blocks) < 0) /* sanity */
525
DIP_SET(ip, i_blocks, 0);
526
ip->i_flag |= IN_CHANGE;
528
(void) chkdq(ip, -blocksreleased, NOCRED, 0);
534
* Release blocks associated with the inode ip and stored in the indirect
535
* block bn. Blocks are free'd in LIFO order up to (but not including)
536
* lastbn. If level is greater than SINGLE, the block is an indirect block
537
* and recursive calls to indirtrunc must be used to cleanse other indirect
541
ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
543
ufs2_daddr_t lbn, lastbn;
546
ufs2_daddr_t *countp;
549
struct fs *fs = ip->i_fs;
552
int i, nblocks, error = 0, allerror = 0;
553
ufs2_daddr_t nb, nlbn, last;
554
ufs2_daddr_t blkcount, factor, blocksreleased = 0;
555
ufs1_daddr_t *bap1 = NULL;
556
ufs2_daddr_t *bap2 = NULL;
557
# define BAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? bap1[i] : bap2[i])
560
* Calculate index in current block of last
561
* block to be kept. -1 indicates the entire
562
* block so we need not calculate the index.
565
for (i = SINGLE; i < level; i++)
566
factor *= NINDIR(fs);
570
nblocks = btodb(fs->fs_bsize);
572
* Get buffer of block pointers, zero those entries corresponding
573
* to blocks to be free'd, and update on disk copy first. Since
574
* double(triple) indirect before single(double) indirect, calls
575
* to bmap on these blocks will fail. However, we already have
576
* the on disk address, so we have to set the b_blkno field
577
* explicitly instead of letting bread do everything for us.
580
bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0, 0);
581
if ((bp->b_flags & B_CACHE) == 0) {
582
curthread->td_ru.ru_inblock++; /* pay for read */
583
bp->b_iocmd = BIO_READ;
584
bp->b_flags &= ~B_INVAL;
585
bp->b_ioflags &= ~BIO_ERROR;
586
if (bp->b_bcount > bp->b_bufsize)
587
panic("ffs_indirtrunc: bad buffer size");
589
vfs_busy_pages(bp, 0);
590
bp->b_iooffset = dbtob(bp->b_blkno);
600
if (ip->i_ump->um_fstype == UFS1)
601
bap1 = (ufs1_daddr_t *)bp->b_data;
603
bap2 = (ufs2_daddr_t *)bp->b_data;
605
MALLOC(copy, caddr_t, fs->fs_bsize, M_TEMP, M_WAITOK);
606
bcopy((caddr_t)bp->b_data, copy, (u_int)fs->fs_bsize);
607
for (i = last + 1; i < NINDIR(fs); i++)
608
if (ip->i_ump->um_fstype == UFS1)
612
if (DOINGASYNC(vp)) {
619
if (ip->i_ump->um_fstype == UFS1)
620
bap1 = (ufs1_daddr_t *)copy;
622
bap2 = (ufs2_daddr_t *)copy;
626
* Recursively free totally unused blocks.
628
for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
629
i--, nlbn += factor) {
633
if (level > SINGLE) {
634
if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
635
(ufs2_daddr_t)-1, level - 1, &blkcount)) != 0)
637
blocksreleased += blkcount;
639
ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize,
641
blocksreleased += nblocks;
645
* Recursively free last partial block.
647
if (level > SINGLE && lastbn >= 0) {
648
last = lastbn % factor;
651
error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
652
last, level - 1, &blkcount);
655
blocksreleased += blkcount;
661
bp->b_flags |= B_INVAL | B_NOCACHE;
665
*countp = blocksreleased;
670
ffs_rdonly(struct inode *ip)
673
return (ip->i_ump->um_fs->fs_ronly != 0);