~ubuntu-branches/debian/jessie/ufsutils/jessie

Viewing changes to include/ufs/ffs/ffs_softdep.c

Committer: Bazaar Package Importer
Author(s): Guillem Jover, Robert Millan, Guillem Jover, Peter Pentchev
Date: 2011-05-31 03:50:05 UTC
mfrom: (1.1.4 upstream)
Revision ID: james.westby@ubuntu.com-20110531035005-wyiyk25p99ivd0k0

Tags: 8.2-1

[ Robert Millan ]
* Set ufsutils-udeb to kfreebsd-any.

[ Guillem Jover ]
* New upstream version (based on FreeBSD 8.2)
* Now using Standards-Version 3.9.2 (no changes needed).
* Switch to source format “3.0 (quilt)”.
  - Remove quilt from Build-Depends.
  - Remove patch target in debian/rules.
  - Remove now unneeded README.source.
  - Refresh all patches.
* Reorganize source code:
  - Switch from debian/upstream.sh to debian/rules get-orig-source target.
  - Switch from CVS to Subversion to retrieve the source code.
  - Use the same source layout as upstream (no more relocations),
    i.e. lib/, sbin/, sys/sys, sys/ufs.
  - Move libport/ to port/.
  - Merge libdisklabel/ into port/.
* Remove unneeded linking against libtermcap, thus removing the need for
  ncurses.
* Add an empty debian/watch file explaining that there's no packaged
  upstream releases. Suggested by Peter Pentchev.
* Update CVS to Subversion reference to upstream source code in
  debian/copyright.
* Remove unused lib variable from debian/rules.
* Use dpkg-buildflags to set CPPFLAGS, CFLAGS and LDFLAGS.
  Based on a patch by Peter Pentchev.
* Remove bogus reference to BSD license in /usr/share/common-licenses.
* Always set -I../../sys, even on GNU/kFreeBSD systems.

[ Peter Pentchev ]
* Remove duplicate section “utils” from ufsutils binary package.
* Remove XC- prefix from Package-Type.
* Honour CPPFLAGS and LDFLAGS and do not link with CFLAGS.

files added:
.pc

.pc/.version

.pc/00_include.patch

.pc/00_include.patch/sys

.pc/00_include.patch/sys/sys

.pc/00_include.patch/sys/sys/disklabel.h

.pc/00_include.patch/sys/sys/endian.h

.pc/00_include.patch/sys/sys/ucred.h

.pc/00_libport.patch

.pc/00_libport.patch/lib

.pc/00_libport.patch/lib/port

.pc/00_libport.patch/lib/port/Makefile

.pc/00_libport.patch/lib/port/blockdev.c

.pc/00_libport.patch/lib/port/blockdev.h

.pc/00_libport.patch/lib/port/getdisklabel.c

.pc/00_libport.patch/lib/port/port.h

.pc/00_mount.patch

.pc/00_mount.patch/sys

.pc/00_mount.patch/sys/sys

.pc/00_mount.patch/sys/sys/mount.h

.pc/00_param.patch

.pc/00_param.patch/sys

.pc/00_param.patch/sys/sys

.pc/00_param.patch/sys/sys/param.h

.pc/01_libufs.patch

.pc/01_libufs.patch/lib

.pc/01_libufs.patch/lib/libufs

.pc/01_libufs.patch/lib/libufs/Makefile

.pc/01_libufs.patch/lib/libufs/block.c

.pc/01_libufs.patch/lib/libufs/type.c

.pc/01_libufs.patch/sys

.pc/01_libufs.patch/sys/ufs

.pc/01_libufs.patch/sys/ufs/ffs

.pc/01_libufs.patch/sys/ufs/ffs/fs.h

.pc/01_libufs.patch/sys/ufs/ufs

.pc/01_libufs.patch/sys/ufs/ufs/dinode.h

.pc/01_libufs.patch/sys/ufs/ufs/dir.h

.pc/02_badsect.ufs.patch

.pc/02_badsect.ufs.patch/sbin

.pc/02_badsect.ufs.patch/sbin/badsect

.pc/02_badsect.ufs.patch/sbin/badsect/Makefile

.pc/02_bsdlabel.ufs.patch

.pc/02_bsdlabel.ufs.patch/sbin

.pc/02_bsdlabel.ufs.patch/sbin/bsdlabel

.pc/02_bsdlabel.ufs.patch/sbin/bsdlabel/Makefile

.pc/02_bsdlabel.ufs.patch/sbin/bsdlabel/bsdlabel.c

.pc/02_dump.ufs.patch

.pc/02_dump.ufs.patch/sbin

.pc/02_dump.ufs.patch/sbin/dump

.pc/02_dump.ufs.patch/sbin/dump/Makefile

.pc/02_dumpfs.ufs.patch

.pc/02_dumpfs.ufs.patch/sbin

.pc/02_dumpfs.ufs.patch/sbin/dumpfs

.pc/02_dumpfs.ufs.patch/sbin/dumpfs/Makefile

.pc/02_dumpfs.ufs.patch/sbin/dumpfs/dumpfs.c

.pc/02_ffsinfo.patch

.pc/02_ffsinfo.patch/sbin

.pc/02_ffsinfo.patch/sbin/ffsinfo

.pc/02_ffsinfo.patch/sbin/ffsinfo/Makefile

.pc/02_fsck.ufs.patch

.pc/02_fsck.ufs.patch/sbin

.pc/02_fsck.ufs.patch/sbin/fsck_ffs

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/Makefile

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/ea.c

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/fsck.h

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/fsck_ffs.8

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/fsutil.c

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/gjournal.c

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/inode.c

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/main.c

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/pass1.c

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/setup.c

.pc/02_fsck.ufs.patch/sbin/fsck_ffs/utilities.c

.pc/02_fsdb.ufs.patch

.pc/02_fsdb.ufs.patch/sbin

.pc/02_fsdb.ufs.patch/sbin/fsdb

.pc/02_fsdb.ufs.patch/sbin/fsdb/Makefile

.pc/02_fsdb.ufs.patch/sbin/fsdb/fsdb.8

.pc/02_fsdb.ufs.patch/sbin/fsdb/fsdb.c

.pc/02_fsdb.ufs.patch/sbin/fsdb/fsdbutil.c

.pc/02_growfs.ufs.patch

.pc/02_growfs.ufs.patch/sbin

.pc/02_growfs.ufs.patch/sbin/growfs

.pc/02_growfs.ufs.patch/sbin/growfs/Makefile

.pc/02_growfs.ufs.patch/sbin/growfs/growfs.c

.pc/02_mkfs.ufs.patch

.pc/02_mkfs.ufs.patch/sbin

.pc/02_mkfs.ufs.patch/sbin/newfs

.pc/02_mkfs.ufs.patch/sbin/newfs/Makefile

.pc/02_mkfs.ufs.patch/sbin/newfs/mkfs.c

.pc/02_mkfs.ufs.patch/sbin/newfs/newfs.8

.pc/02_mkfs.ufs.patch/sbin/newfs/newfs.c

.pc/02_tunefs.ufs.patch

.pc/02_tunefs.ufs.patch/sbin

.pc/02_tunefs.ufs.patch/sbin/tunefs

.pc/02_tunefs.ufs.patch/sbin/tunefs/Makefile

.pc/02_tunefs.ufs.patch/sbin/tunefs/tunefs.c

.pc/03_ufsmount.patch

.pc/03_ufsmount.patch/sys

.pc/03_ufsmount.patch/sys/ufs

.pc/03_ufsmount.patch/sys/ufs/ufs

.pc/03_ufsmount.patch/sys/ufs/ufs/ufsmount.h

.pc/99_makefiles.patch

.pc/99_makefiles.patch/Makefile

.pc/99_makefiles.patch/Makefile.common

.pc/applied-patches

Makefile

Makefile.common

debian/source

debian/source/format

debian/watch

lib/libufs

lib/libufs/Makefile

lib/libufs/block.c

lib/libufs/bread.3

lib/libufs/cgread.3

lib/libufs/cgroup.c

lib/libufs/inode.c

lib/libufs/libufs.3

lib/libufs/libufs.h

lib/libufs/sblock.c

lib/libufs/sbread.3

lib/libufs/type.c

lib/libufs/ufs_disk_close.3

lib/port

lib/port/Makefile

lib/port/blockdev.c

lib/port/blockdev.h

lib/port/getdisklabel.c

lib/port/port.h

sbin

sbin/badsect

sbin/badsect/Makefile

sbin/badsect/badsect.8

sbin/badsect/badsect.c

sbin/bsdlabel

sbin/bsdlabel/Makefile

sbin/bsdlabel/bsdlabel.5

sbin/bsdlabel/bsdlabel.8

sbin/bsdlabel/bsdlabel.c

sbin/bsdlabel/pathnames.h

sbin/bsdlabel/runtest.sh

sbin/dump

sbin/dump/Makefile

sbin/dump/cache.c

sbin/dump/dump.8

sbin/dump/dump.h

sbin/dump/dumprmt.c

sbin/dump/itime.c

sbin/dump/main.c

sbin/dump/optr.c

sbin/dump/pathnames.h

sbin/dump/tape.c

sbin/dump/traverse.c

sbin/dump/unctime.c

sbin/dumpfs

sbin/dumpfs/Makefile

sbin/dumpfs/dumpfs.8

sbin/dumpfs/dumpfs.c

sbin/ffsinfo

sbin/ffsinfo/Makefile

sbin/ffsinfo/ffsinfo.8

sbin/ffsinfo/ffsinfo.c

sbin/fsck_ffs

sbin/fsck_ffs/Makefile

sbin/fsck_ffs/SMM.doc

sbin/fsck_ffs/SMM.doc/0.t

sbin/fsck_ffs/SMM.doc/1.t

sbin/fsck_ffs/SMM.doc/2.t

sbin/fsck_ffs/SMM.doc/3.t

sbin/fsck_ffs/SMM.doc/4.t

sbin/fsck_ffs/SMM.doc/Makefile

sbin/fsck_ffs/dir.c

sbin/fsck_ffs/ea.c

sbin/fsck_ffs/fsck.h

sbin/fsck_ffs/fsck_ffs.8

sbin/fsck_ffs/fsutil.c

sbin/fsck_ffs/gjournal.c

sbin/fsck_ffs/inode.c

sbin/fsck_ffs/main.c

sbin/fsck_ffs/pass1.c

sbin/fsck_ffs/pass1b.c

sbin/fsck_ffs/pass2.c

sbin/fsck_ffs/pass3.c

sbin/fsck_ffs/pass4.c

sbin/fsck_ffs/pass5.c

sbin/fsck_ffs/setup.c

sbin/fsck_ffs/utilities.c

sbin/fsdb

sbin/fsdb/Makefile

sbin/fsdb/fsdb.8

sbin/fsdb/fsdb.c

sbin/fsdb/fsdb.h

sbin/fsdb/fsdbutil.c

sbin/growfs

sbin/growfs/Makefile

sbin/growfs/debug.c

sbin/growfs/debug.h

sbin/growfs/growfs.8

sbin/growfs/growfs.c

sbin/mount

sbin/mount/Makefile

sbin/mount/extern.h

sbin/mount/getmntopts.3

sbin/mount/getmntopts.c

sbin/mount/mntopts.h

sbin/mount/mount.8

sbin/mount/mount.c

sbin/mount/mount_fs.c

sbin/mount/pathnames.h

sbin/mount/vfslist.c

sbin/newfs

sbin/newfs/Makefile

sbin/newfs/mkfs.c

sbin/newfs/newfs.8

sbin/newfs/newfs.c

sbin/newfs/newfs.h

sbin/newfs/ref.test

sbin/newfs/runtest00.sh

sbin/newfs/runtest01.sh

sbin/sunlabel

sbin/sunlabel/Makefile

sbin/sunlabel/runtest.sh

sbin/sunlabel/sunlabel.8

sbin/sunlabel/sunlabel.c

sbin/tunefs

sbin/tunefs/Makefile

sbin/tunefs/tunefs.8

sbin/tunefs/tunefs.c

sys/geom

sys/geom/geom_bsd_enc.c

sys/sys

sys/sys/disklabel.h

sys/sys/endian.h

sys/sys/mount.h

sys/sys/param.h

sys/sys/ucred.h

sys/ufs

sys/ufs/ffs

sys/ufs/ffs/ffs_alloc.c

sys/ufs/ffs/ffs_balloc.c

sys/ufs/ffs/ffs_extern.h

sys/ufs/ffs/ffs_inode.c

sys/ufs/ffs/ffs_rawread.c

sys/ufs/ffs/ffs_snapshot.c

sys/ufs/ffs/ffs_softdep.c

sys/ufs/ffs/ffs_subr.c

sys/ufs/ffs/ffs_tables.c

sys/ufs/ffs/ffs_vfsops.c

sys/ufs/ffs/ffs_vnops.c

sys/ufs/ffs/fs.h

sys/ufs/ffs/softdep.h

sys/ufs/ufs

sys/ufs/ufs/README.acls

sys/ufs/ufs/README.extattr

sys/ufs/ufs/acl.h

sys/ufs/ufs/dinode.h

sys/ufs/ufs/dir.h

sys/ufs/ufs/dirhash.h

sys/ufs/ufs/extattr.h

sys/ufs/ufs/gjournal.h

sys/ufs/ufs/inode.h

sys/ufs/ufs/quota.h

sys/ufs/ufs/ufs_acl.c

sys/ufs/ufs/ufs_bmap.c

sys/ufs/ufs/ufs_dirhash.c

sys/ufs/ufs/ufs_extattr.c

sys/ufs/ufs/ufs_extern.h

sys/ufs/ufs/ufs_gjournal.c

sys/ufs/ufs/ufs_inode.c

sys/ufs/ufs/ufs_lookup.c

sys/ufs/ufs/ufs_quota.c

sys/ufs/ufs/ufs_vfsops.c

sys/ufs/ufs/ufs_vnops.c

sys/ufs/ufs/ufsmount.h

files removed:
badsect.ufs

badsect.ufs/CVS

badsect.ufs/CVS/Entries

badsect.ufs/CVS/Repository

badsect.ufs/CVS/Root

badsect.ufs/CVS/Tag

badsect.ufs/CVS/Template

badsect.ufs/Makefile

badsect.ufs/badsect.8

badsect.ufs/badsect.c

bsdlabel

bsdlabel/CVS

bsdlabel/CVS/Entries

bsdlabel/CVS/Repository

bsdlabel/CVS/Root

bsdlabel/CVS/Tag

bsdlabel/CVS/Template

bsdlabel/Makefile

bsdlabel/bsdlabel.5

bsdlabel/bsdlabel.8

bsdlabel/bsdlabel.c

bsdlabel/pathnames.h

bsdlabel/runtest.sh

debian/README.source

debian/patches/01_libdisklabel.patch

debian/upstream.sh

dump.ufs

dump.ufs/CVS

dump.ufs/CVS/Entries

dump.ufs/CVS/Repository

dump.ufs/CVS/Root

dump.ufs/CVS/Tag

dump.ufs/CVS/Template

dump.ufs/Makefile

dump.ufs/cache.c

dump.ufs/dump.8

dump.ufs/dump.h

dump.ufs/dumprmt.c

dump.ufs/itime.c

dump.ufs/main.c

dump.ufs/optr.c

dump.ufs/pathnames.h

dump.ufs/tape.c

dump.ufs/traverse.c

dump.ufs/unctime.c

dumpfs.ufs

dumpfs.ufs/CVS

dumpfs.ufs/CVS/Entries

dumpfs.ufs/CVS/Repository

dumpfs.ufs/CVS/Root

dumpfs.ufs/CVS/Tag

dumpfs.ufs/CVS/Template

dumpfs.ufs/Makefile

dumpfs.ufs/dumpfs.8

dumpfs.ufs/dumpfs.c

ffsinfo

ffsinfo/CVS

ffsinfo/CVS/Entries

ffsinfo/CVS/Repository

ffsinfo/CVS/Root

ffsinfo/CVS/Tag

ffsinfo/CVS/Template

ffsinfo/Makefile

ffsinfo/ffsinfo.8

ffsinfo/ffsinfo.c

freebsd

freebsd/sys

freebsd/sys/disklabel.h

freebsd/sys/mount.h

freebsd/sys/param.h

freebsd/sys/ucred.h

fsck.ufs

fsck.ufs/CVS

fsck.ufs/CVS/Entries

fsck.ufs/CVS/Repository

fsck.ufs/CVS/Root

fsck.ufs/CVS/Tag

fsck.ufs/CVS/Template

fsck.ufs/Makefile

fsck.ufs/SMM.doc

fsck.ufs/SMM.doc/0.t

fsck.ufs/SMM.doc/1.t

fsck.ufs/SMM.doc/2.t

fsck.ufs/SMM.doc/3.t

fsck.ufs/SMM.doc/4.t

fsck.ufs/SMM.doc/CVS

fsck.ufs/SMM.doc/CVS/Entries

fsck.ufs/SMM.doc/CVS/Repository

fsck.ufs/SMM.doc/CVS/Root

fsck.ufs/SMM.doc/CVS/Tag

fsck.ufs/SMM.doc/Makefile

fsck.ufs/dir.c

fsck.ufs/ea.c

fsck.ufs/fsck.h

fsck.ufs/fsck_ffs.8

fsck.ufs/fsutil.c

fsck.ufs/gjournal.c

fsck.ufs/inode.c

fsck.ufs/main.c

fsck.ufs/pass1.c

fsck.ufs/pass1b.c

fsck.ufs/pass2.c

fsck.ufs/pass3.c

fsck.ufs/pass4.c

fsck.ufs/pass5.c

fsck.ufs/setup.c

fsck.ufs/utilities.c

fsdb.ufs

fsdb.ufs/CVS

fsdb.ufs/CVS/Entries

fsdb.ufs/CVS/Repository

fsdb.ufs/CVS/Root

fsdb.ufs/CVS/Tag

fsdb.ufs/CVS/Template

fsdb.ufs/Makefile

fsdb.ufs/fsdb.8

fsdb.ufs/fsdb.c

fsdb.ufs/fsdb.h

fsdb.ufs/fsdbutil.c

growfs.ufs

growfs.ufs/CVS

growfs.ufs/CVS/Entries

growfs.ufs/CVS/Repository

growfs.ufs/CVS/Root

growfs.ufs/CVS/Tag

growfs.ufs/CVS/Template

growfs.ufs/Makefile

growfs.ufs/debug.c

growfs.ufs/debug.h

growfs.ufs/growfs.8

growfs.ufs/growfs.c

include

include/ufs

include/ufs/CVS

include/ufs/CVS/Entries

include/ufs/CVS/Repository

include/ufs/CVS/Root

include/ufs/CVS/Tag

include/ufs/CVS/Template

include/ufs/ffs

include/ufs/ffs/CVS

include/ufs/ffs/CVS/Entries

include/ufs/ffs/CVS/Repository

include/ufs/ffs/CVS/Root

include/ufs/ffs/CVS/Tag

include/ufs/ffs/README.snapshot

include/ufs/ffs/ffs_alloc.c

include/ufs/ffs/ffs_balloc.c

include/ufs/ffs/ffs_extern.h

include/ufs/ffs/ffs_inode.c

include/ufs/ffs/ffs_rawread.c

include/ufs/ffs/ffs_snapshot.c

include/ufs/ffs/ffs_softdep.c

include/ufs/ffs/ffs_subr.c

include/ufs/ffs/ffs_tables.c

include/ufs/ffs/ffs_vfsops.c

include/ufs/ffs/ffs_vnops.c

include/ufs/ffs/fs.h

include/ufs/ffs/softdep.h

include/ufs/ufs

include/ufs/ufs/CVS

include/ufs/ufs/CVS/Entries

include/ufs/ufs/CVS/Repository

include/ufs/ufs/CVS/Root

include/ufs/ufs/CVS/Tag

include/ufs/ufs/README.acls

include/ufs/ufs/README.extattr

include/ufs/ufs/acl.h

include/ufs/ufs/dinode.h

include/ufs/ufs/dir.h

include/ufs/ufs/dirhash.h

include/ufs/ufs/extattr.h

include/ufs/ufs/gjournal.h

include/ufs/ufs/inode.h

include/ufs/ufs/quota.h

include/ufs/ufs/ufs_acl.c

include/ufs/ufs/ufs_bmap.c

include/ufs/ufs/ufs_dirhash.c

include/ufs/ufs/ufs_extattr.c

include/ufs/ufs/ufs_extern.h

include/ufs/ufs/ufs_gjournal.c

include/ufs/ufs/ufs_inode.c

include/ufs/ufs/ufs_lookup.c

include/ufs/ufs/ufs_quota.c

include/ufs/ufs/ufs_vfsops.c

include/ufs/ufs/ufs_vnops.c

include/ufs/ufs/ufsmount.h

libufs

libufs/CVS

libufs/CVS/Entries

libufs/CVS/Repository

libufs/CVS/Root

libufs/CVS/Tag

libufs/CVS/Template

libufs/Makefile

libufs/block.c

libufs/bread.3

libufs/cgread.3

libufs/cgroup.c

libufs/inode.c

libufs/libufs.3

libufs/libufs.h

libufs/sblock.c

libufs/sbread.3

libufs/type.c

libufs/ufs_disk_close.3

mkfs.ufs

mkfs.ufs/CVS

mkfs.ufs/CVS/Entries

mkfs.ufs/CVS/Repository

mkfs.ufs/CVS/Root

mkfs.ufs/CVS/Tag

mkfs.ufs/CVS/Template

mkfs.ufs/Makefile

mkfs.ufs/mkfs.c

mkfs.ufs/newfs.8

mkfs.ufs/newfs.c

mkfs.ufs/newfs.h

mkfs.ufs/ref.test

mkfs.ufs/runtest00.sh

mkfs.ufs/runtest01.sh

mount

mount/CVS

mount/CVS/Entries

mount/CVS/Repository

mount/CVS/Root

mount/CVS/Tag

mount/CVS/Template

mount/Makefile

mount/extern.h

mount/getmntopts.3

mount/getmntopts.c

mount/mntopts.h

mount/mount.8

mount/mount.c

mount/mount_fs.c

mount/pathnames.h

mount/vfslist.c

sunlabel

sunlabel/CVS

sunlabel/CVS/Entries

sunlabel/CVS/Repository

sunlabel/CVS/Root

sunlabel/CVS/Tag

sunlabel/CVS/Template

sunlabel/Makefile

sunlabel/runtest.sh

sunlabel/sunlabel.8

sunlabel/sunlabel.c

tunefs.ufs

tunefs.ufs/CVS

tunefs.ufs/CVS/Entries

tunefs.ufs/CVS/Repository

tunefs.ufs/CVS/Root

tunefs.ufs/CVS/Tag

tunefs.ufs/CVS/Template

tunefs.ufs/Makefile

tunefs.ufs/tunefs.8

tunefs.ufs/tunefs.c

files modified:
debian/TODO

debian/changelog

debian/control

debian/copyright

debian/patches/00_include.patch

debian/patches/00_libport.patch

debian/patches/00_mount.patch

debian/patches/00_param.patch

debian/patches/01_libufs.patch

debian/patches/02_badsect.ufs.patch

debian/patches/02_bsdlabel.ufs.patch

debian/patches/02_dump.ufs.patch

debian/patches/02_dumpfs.ufs.patch

debian/patches/02_ffsinfo.patch

debian/patches/02_fsck.ufs.patch

debian/patches/02_fsdb.ufs.patch

debian/patches/02_growfs.ufs.patch

debian/patches/02_mkfs.ufs.patch

debian/patches/02_tunefs.ufs.patch

debian/patches/03_ufsmount.patch

debian/patches/99_makefiles.patch

debian/patches/series

debian/rules

Show diffs side-by-side

added added

removed removed

include/ufs/ffs/ffs_softdep.c

/*-

* The soft updates code is derived from the appendix of a University

* of Michigan technical report (Gregory R. Ganger and Yale N. Patt,

* "Soft Updates: A Solution to the Metadata Update Problem in File

* Systems", CSE-TR-254-95, August 1995).

* Further information about soft updates can be obtained from:

* Marshall Kirk McKusick http://www.mckusick.com/softdep/

* 1614 Oxford Street mckusick@mckusick.com

* Berkeley, CA 94709-1608 +1-510-843-9542

* USA

* Redistribution and use in source and binary forms, with or without

* modification, are permitted provided that the following conditions

* are met:

* 1. Redistributions of source code must retain the above copyright

* notice, this list of conditions and the following disclaimer.

* 2. Redistributions in binary form must reproduce the above copyright

* notice, this list of conditions and the following disclaimer in the

* documentation and/or other materials provided with the distribution.

* THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY

* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

* DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR

* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

* SUCH DAMAGE.

* from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00

#include <sys/cdefs.h>

__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_softdep.c,v 1.211.2.9.2.1 2010/02/10 00:26:20 kensmith Exp $");

#include "opt_ffs.h"

#include "opt_ddb.h"

* For now we want the safety net that DEBUG flags provide.

#ifndef DEBUG

#define DEBUG

#endif

#include <sys/param.h>

#include <sys/kernel.h>

#include <sys/systm.h>

#include <sys/bio.h>

#include <sys/buf.h>

#include <sys/kdb.h>

#include <sys/kthread.h>

#include <sys/lock.h>

#include <sys/malloc.h>

#include <sys/mount.h>

#include <sys/mutex.h>

#include <sys/proc.h>

#include <sys/stat.h>

#include <sys/sysctl.h>

#include <sys/syslog.h>

#include <sys/vnode.h>

#include <sys/conf.h>

#include <ufs/ufs/dir.h>

#include <ufs/ufs/extattr.h>

#include <ufs/ufs/quota.h>

#include <ufs/ufs/inode.h>

#include <ufs/ufs/ufsmount.h>

#include <ufs/ffs/fs.h>

#include <ufs/ffs/softdep.h>

#include <ufs/ffs/ffs_extern.h>

#include <ufs/ufs/ufs_extern.h>

#include <vm/vm.h>

#include <ddb/ddb.h>

#include "opt_quota.h"

#ifndef SOFTUPDATES

int

softdep_flushfiles(oldmnt, flags, td)

struct mount *oldmnt;

int flags;

struct thread *td;

{

panic("softdep_flushfiles called");

}

int

softdep_mount(devvp, mp, fs, cred)

100

struct vnode *devvp;

101

struct mount *mp;

102

struct fs *fs;

103

struct ucred *cred;

104

{

105

106

return (0);

107

}

108

109

void

110

softdep_initialize()

111

{

112

113

return;

114

}

115

116

void

117

softdep_uninitialize()

118

{

119

120

return;

121

}

122

123

void

124

softdep_setup_inomapdep(bp, ip, newinum)

125

struct buf *bp;

126

struct inode *ip;

127

ino_t newinum;

128

{

129

130

panic("softdep_setup_inomapdep called");

131

}

132

133

void

134

softdep_setup_blkmapdep(bp, mp, newblkno)

135

struct buf *bp;

136

struct mount *mp;

137

ufs2_daddr_t newblkno;

138

{

139

140

panic("softdep_setup_blkmapdep called");

141

}

142

143

void

144

softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)

145

struct inode *ip;

146

ufs_lbn_t lbn;

147

ufs2_daddr_t newblkno;

148

ufs2_daddr_t oldblkno;

149

long newsize;

150

long oldsize;

151

struct buf *bp;

152

{

153

154

panic("softdep_setup_allocdirect called");

155

}

156

157

void

158

softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)

159

struct inode *ip;

160

ufs_lbn_t lbn;

161

ufs2_daddr_t newblkno;

162

ufs2_daddr_t oldblkno;

163

long newsize;

164

long oldsize;

165

struct buf *bp;

166

{

167

168

panic("softdep_setup_allocext called");

169

}

170

171

void

172

softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)

173

struct inode *ip;

174

ufs_lbn_t lbn;

175

struct buf *bp;

176

int ptrno;

177

ufs2_daddr_t newblkno;

178

ufs2_daddr_t oldblkno;

179

struct buf *nbp;

180

{

181

182

panic("softdep_setup_allocindir_page called");

183

}

184

185

void

186

softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)

187

struct buf *nbp;

188

struct inode *ip;

189

struct buf *bp;

190

int ptrno;

191

ufs2_daddr_t newblkno;

192

{

193

194

panic("softdep_setup_allocindir_meta called");

195

}

196

197

void

198

softdep_setup_freeblocks(ip, length, flags)

199

struct inode *ip;

200

off_t length;

201

int flags;

202

{

203

204

panic("softdep_setup_freeblocks called");

205

}

206

207

void

208

softdep_freefile(pvp, ino, mode)

209

struct vnode *pvp;

210

ino_t ino;

211

int mode;

212

{

213

214

panic("softdep_freefile called");

215

}

216

217

int

218

softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)

219

struct buf *bp;

220

struct inode *dp;

221

off_t diroffset;

222

ino_t newinum;

223

struct buf *newdirbp;

224

int isnewblk;

225

{

226

227

panic("softdep_setup_directory_add called");

228

}

229

230

void

231

softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)

232

struct inode *dp;

233

caddr_t base;

234

caddr_t oldloc;

235

caddr_t newloc;

236

int entrysize;

237

{

238

239

panic("softdep_change_directoryentry_offset called");

240

}

241

242

void

243

softdep_setup_remove(bp, dp, ip, isrmdir)

244

struct buf *bp;

245

struct inode *dp;

246

struct inode *ip;

247

int isrmdir;

248

{

249

250

panic("softdep_setup_remove called");

251

}

252

253

void

254

softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)

255

struct buf *bp;

256

struct inode *dp;

257

struct inode *ip;

258

ino_t newinum;

259

int isrmdir;

260

{

261

262

panic("softdep_setup_directory_change called");

263

}

264

265

void

266

softdep_change_linkcnt(ip)

267

struct inode *ip;

268

{

269

270

panic("softdep_change_linkcnt called");

271

}

272

273

void

274

softdep_load_inodeblock(ip)

275

struct inode *ip;

276

{

277

278

panic("softdep_load_inodeblock called");

279

}

280

281

void

282

softdep_update_inodeblock(ip, bp, waitfor)

283

struct inode *ip;

284

struct buf *bp;

285

int waitfor;

286

{

287

288

panic("softdep_update_inodeblock called");

289

}

290

291

int

292

softdep_fsync(vp)

293

struct vnode *vp; /* the "in_core" copy of the inode */

294

{

295

296

return (0);

297

}

298

299

void

300

softdep_fsync_mountdev(vp)

301

struct vnode *vp;

302

{

303

304

return;

305

}

306

307

int

308

softdep_flushworklist(oldmnt, countp, td)

309

struct mount *oldmnt;

310

int *countp;

311

struct thread *td;

312

{

313

314

*countp = 0;

315

return (0);

316

}

317

318

int

319

softdep_sync_metadata(struct vnode *vp)

320

{

321

322

return (0);

323

}

324

325

int

326

softdep_slowdown(vp)

327

struct vnode *vp;

328

{

329

330

panic("softdep_slowdown called");

331

}

332

333

void

334

softdep_releasefile(ip)

335

struct inode *ip; /* inode with the zero effective link count */

336

{

337

338

panic("softdep_releasefile called");

339

}

340

341

int

342

softdep_request_cleanup(fs, vp)

343

struct fs *fs;

344

struct vnode *vp;

345

{

346

347

return (0);

348

}

349

350

int

351

softdep_check_suspend(struct mount *mp,

352

struct vnode *devvp,

353

int softdep_deps,

354

int softdep_accdeps,

355

int secondary_writes,

356

int secondary_accwrites)

357

{

358

struct bufobj *bo;

359

int error;

360

361

(void) softdep_deps,

362

(void) softdep_accdeps;

363

364

ASSERT_VI_LOCKED(devvp, "softdep_check_suspend");

365

bo = &devvp->v_bufobj;

366

367

for (;;) {

368

if (!MNT_ITRYLOCK(mp)) {

369

VI_UNLOCK(devvp);

370

MNT_ILOCK(mp);

371

MNT_IUNLOCK(mp);

372

VI_LOCK(devvp);

373

continue;

374

}

375

if (mp->mnt_secondary_writes != 0) {

376

VI_UNLOCK(devvp);

377

msleep(&mp->mnt_secondary_writes,

378

MNT_MTX(mp),

379

(PUSER - 1) | PDROP, "secwr", 0);

380

VI_LOCK(devvp);

381

continue;

382

}

383

break;

384

}

385

386

387

* Reasons for needing more work before suspend:

388

* - Dirty buffers on devvp.

389

* - Secondary writes occurred after start of vnode sync loop

390

391

error = 0;

392

if (bo->bo_numoutput > 0 ||

393

bo->bo_dirty.bv_cnt > 0 ||

394

secondary_writes != 0 ||

395

mp->mnt_secondary_writes != 0 ||

396

secondary_accwrites != mp->mnt_secondary_accwrites)

397

error = EAGAIN;

398

VI_UNLOCK(devvp);

399

return (error);

400

}

401

402

void

403

softdep_get_depcounts(struct mount *mp,

404

int *softdepactivep,

405

int *softdepactiveaccp)

406

{

407

(void) mp;

408

*softdepactivep = 0;

409

*softdepactiveaccp = 0;

410

}

411

412

#else

413

414

* These definitions need to be adapted to the system to which

415

* this file is being ported.

416

417

418

* malloc types defined for the softdep system.

419

420

static MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies");

421

static MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies");

422

static MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation");

423

static MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map");

424

static MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode");

425

static MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies");

426

static MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block");

427

static MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode");

428

static MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode");

429

static MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated");

430

static MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry");

431

static MALLOC_DEFINE(M_MKDIR, "mkdir","New directory");

432

static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted");

433

static MALLOC_DEFINE(M_NEWDIRBLK, "newdirblk","Unclaimed new directory block");

434

static MALLOC_DEFINE(M_SAVEDINO, "savedino","Saved inodes");

435

436

#define M_SOFTDEP_FLAGS (M_WAITOK | M_USE_RESERVE)

437

438

#define D_PAGEDEP 0

439

#define D_INODEDEP 1

440

#define D_NEWBLK 2

441

#define D_BMSAFEMAP 3

442

#define D_ALLOCDIRECT 4

443

#define D_INDIRDEP 5

444

#define D_ALLOCINDIR 6

445

#define D_FREEFRAG 7

446

#define D_FREEBLKS 8

447

#define D_FREEFILE 9

448

#define D_DIRADD 10

449

#define D_MKDIR 11

450

#define D_DIRREM 12

451

#define D_NEWDIRBLK 13

452

#define D_LAST D_NEWDIRBLK

453

454

455

* translate from workitem type to memory type

456

* MUST match the defines above, such that memtype[D_XXX] == M_XXX

457

458

static struct malloc_type *memtype[] = {

459

M_PAGEDEP,

460

M_INODEDEP,

461

M_NEWBLK,

462

M_BMSAFEMAP,

463

M_ALLOCDIRECT,

464

M_INDIRDEP,

465

M_ALLOCINDIR,

466

M_FREEFRAG,

467

M_FREEBLKS,

468

M_FREEFILE,

469

M_DIRADD,

470

M_MKDIR,

471

M_DIRREM,

472

M_NEWDIRBLK

473

};

474

475

#define DtoM(type) (memtype[type])

476

477

478

* Names of malloc types.

479

480

#define TYPENAME(type) \

481

((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???")

482

483

* End system adaptation definitions.

484

485

486

487

* Forward declarations.

488

489

struct inodedep_hashhead;

490

struct newblk_hashhead;

491

struct pagedep_hashhead;

492

493

494

* Internal function prototypes.

495

496

static void softdep_error(char *, int);

497

static void drain_output(struct vnode *);

498

static struct buf *getdirtybuf(struct buf *, struct mtx *, int);

499

static void clear_remove(struct thread *);

500

static void clear_inodedeps(struct thread *);

501

static int flush_pagedep_deps(struct vnode *, struct mount *,

502

struct diraddhd *);

503

static int flush_inodedep_deps(struct mount *, ino_t);

504

static int flush_deplist(struct allocdirectlst *, int, int *);

505

static int handle_written_filepage(struct pagedep *, struct buf *);

506

static void diradd_inode_written(struct diradd *, struct inodedep *);

507

static int handle_written_inodeblock(struct inodedep *, struct buf *);

508

static void handle_allocdirect_partdone(struct allocdirect *);

509

static void handle_allocindir_partdone(struct allocindir *);

510

static void initiate_write_filepage(struct pagedep *, struct buf *);

511

static void handle_written_mkdir(struct mkdir *, int);

512

static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);

513

static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);

514

static void handle_workitem_freefile(struct freefile *);

515

static void handle_workitem_remove(struct dirrem *, struct vnode *);

516

static struct dirrem *newdirrem(struct buf *, struct inode *,

517

struct inode *, int, struct dirrem **);

518

static void free_diradd(struct diradd *);

519

static void free_allocindir(struct allocindir *, struct inodedep *);

520

static void free_newdirblk(struct newdirblk *);

521

static int indir_trunc(struct freeblks *, ufs2_daddr_t, int, ufs_lbn_t,

522

ufs2_daddr_t *);

523

static void deallocate_dependencies(struct buf *, struct inodedep *);

524

static void free_allocdirect(struct allocdirectlst *,

525

struct allocdirect *, int);

526

static int check_inode_unwritten(struct inodedep *);

527

static int free_inodedep(struct inodedep *);

528

static void handle_workitem_freeblocks(struct freeblks *, int);

529

static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);

530

static void setup_allocindir_phase2(struct buf *, struct inode *,

531

struct allocindir *);

532

static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,

533

ufs2_daddr_t);

534

static void handle_workitem_freefrag(struct freefrag *);

535

static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long);

536

static void allocdirect_merge(struct allocdirectlst *,

537

struct allocdirect *, struct allocdirect *);

538

static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *);

539

static int newblk_find(struct newblk_hashhead *, struct fs *, ufs2_daddr_t,

540

struct newblk **);

541

static int newblk_lookup(struct fs *, ufs2_daddr_t, int, struct newblk **);

542

static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,

543

struct inodedep **);

544

static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);

545

static int pagedep_lookup(struct inode *, ufs_lbn_t, int, struct pagedep **);

546

static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,

547

struct mount *mp, int, struct pagedep **);

548

static void pause_timer(void *);

549

static int request_cleanup(struct mount *, int);

550

static int process_worklist_item(struct mount *, int);

551

static void add_to_worklist(struct worklist *);

552

static void softdep_flush(void);

553

static int softdep_speedup(void);

554

555

556

* Exported softdep operations.

557

558

static void softdep_disk_io_initiation(struct buf *);

559

static void softdep_disk_write_complete(struct buf *);

560

static void softdep_deallocate_dependencies(struct buf *);

561

static int softdep_count_dependencies(struct buf *bp, int);

562

563

static struct mtx lk;

564

MTX_SYSINIT(softdep_lock, &lk, "Softdep Lock", MTX_DEF);

565

566

#define TRY_ACQUIRE_LOCK(lk) mtx_trylock(lk)

567

#define ACQUIRE_LOCK(lk) mtx_lock(lk)

568

#define FREE_LOCK(lk) mtx_unlock(lk)

569

570

571

* Worklist queue management.

572

* These routines require that the lock be held.

573

574

#ifndef /* NOT */ DEBUG

575

#define WORKLIST_INSERT(head, item) do { \

576

(item)->wk_state |= ONWORKLIST; \

577

LIST_INSERT_HEAD(head, item, wk_list); \

578

} while (0)

579

#define WORKLIST_REMOVE(item) do { \

580

(item)->wk_state &= ~ONWORKLIST; \

581

LIST_REMOVE(item, wk_list); \

582

} while (0)

583

#else /* DEBUG */

584

static void worklist_insert(struct workhead *, struct worklist *);

585

static void worklist_remove(struct worklist *);

586

587

#define WORKLIST_INSERT(head, item) worklist_insert(head, item)

588

#define WORKLIST_REMOVE(item) worklist_remove(item)

589

590

static void

591

worklist_insert(head, item)

592

struct workhead *head;

593

struct worklist *item;

594

{

595

596

mtx_assert(&lk, MA_OWNED);

597

if (item->wk_state & ONWORKLIST)

598

panic("worklist_insert: already on list");

599

item->wk_state |= ONWORKLIST;

600

LIST_INSERT_HEAD(head, item, wk_list);

601

}

602

603

static void

604

worklist_remove(item)

605

struct worklist *item;

606

{

607

608

mtx_assert(&lk, MA_OWNED);

609

if ((item->wk_state & ONWORKLIST) == 0)

610

panic("worklist_remove: not on list");

611

item->wk_state &= ~ONWORKLIST;

612

LIST_REMOVE(item, wk_list);

613

}

614

#endif /* DEBUG */

615

616

617

* Routines for tracking and managing workitems.

618

619

static void workitem_free(struct worklist *, int);

620

static void workitem_alloc(struct worklist *, int, struct mount *);

621

622

#define WORKITEM_FREE(item, type) workitem_free((struct worklist *)(item), (type))

623

624

static void

625

workitem_free(item, type)

626

struct worklist *item;

627

int type;

628

{

629

struct ufsmount *ump;

630

mtx_assert(&lk, MA_OWNED);

631

632

#ifdef DEBUG

633

if (item->wk_state & ONWORKLIST)

634

panic("workitem_free: still on list");

635

if (item->wk_type != type)

636

panic("workitem_free: type mismatch");

637

#endif

638

ump = VFSTOUFS(item->wk_mp);

639

if (--ump->softdep_deps == 0 && ump->softdep_req)

640

wakeup(&ump->softdep_deps);

641

FREE(item, DtoM(type));

642

}

643

644

static void

645

workitem_alloc(item, type, mp)

646

struct worklist *item;

647

int type;

648

struct mount *mp;

649

{

650

item->wk_type = type;

651

item->wk_mp = mp;

652

item->wk_state = 0;

653

ACQUIRE_LOCK(&lk);

654

VFSTOUFS(mp)->softdep_deps++;

655

VFSTOUFS(mp)->softdep_accdeps++;

656

FREE_LOCK(&lk);

657

}

658

659

660

* Workitem queue management

661

662

static int max_softdeps; /* maximum number of structs before slowdown */

663

static int maxindirdeps = 50; /* max number of indirdeps before slowdown */

664

static int tickdelay = 2; /* number of ticks to pause during slowdown */

665

static int proc_waiting; /* tracks whether we have a timeout posted */

666

static int *stat_countp; /* statistic to count in proc_waiting timeout */

667

static struct callout softdep_callout;

668

static int req_pending;

669

static int req_clear_inodedeps; /* syncer process flush some inodedeps */

670

#define FLUSH_INODES 1

671

static int req_clear_remove; /* syncer process flush some freeblks */

672

#define FLUSH_REMOVE 2

673

#define FLUSH_REMOVE_WAIT 3

674

static long num_freeblkdep; /* number of freeblks workitems allocated */

675

676

677

* runtime statistics

678

679

static int stat_worklist_push; /* number of worklist cleanups */

680

static int stat_blk_limit_push; /* number of times block limit neared */

681

static int stat_ino_limit_push; /* number of times inode limit neared */

682

static int stat_blk_limit_hit; /* number of times block slowdown imposed */

683

static int stat_ino_limit_hit; /* number of times inode slowdown imposed */

684

static int stat_sync_limit_hit; /* number of synchronous slowdowns imposed */

685

static int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */

686

static int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */

687

static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */

688

static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */

689

690

SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");

691

SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");

692

SYSCTL_INT(_debug, OID_AUTO, maxindirdeps, CTLFLAG_RW, &maxindirdeps, 0, "");

693

SYSCTL_INT(_debug, OID_AUTO, worklist_push, CTLFLAG_RW, &stat_worklist_push, 0,"");

694

SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");

695

SYSCTL_INT(_debug, OID_AUTO, ino_limit_push, CTLFLAG_RW, &stat_ino_limit_push, 0,"");

696

SYSCTL_INT(_debug, OID_AUTO, blk_limit_hit, CTLFLAG_RW, &stat_blk_limit_hit, 0, "");

697

SYSCTL_INT(_debug, OID_AUTO, ino_limit_hit, CTLFLAG_RW, &stat_ino_limit_hit, 0, "");

698

SYSCTL_INT(_debug, OID_AUTO, sync_limit_hit, CTLFLAG_RW, &stat_sync_limit_hit, 0, "");

699

SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0, "");

700

SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");

701

SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");

702

SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");

703

/* SYSCTL_INT(_debug, OID_AUTO, worklist_num, CTLFLAG_RD, &softdep_on_worklist, 0, ""); */

704

705

SYSCTL_DECL(_vfs_ffs);

706

707

static int compute_summary_at_mount = 0; /* Whether to recompute the summary at mount time */

708

SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,

709

&compute_summary_at_mount, 0, "Recompute summary at mount");

710

711

static struct proc *softdepproc;

712

static struct kproc_desc softdep_kp = {

713

"softdepflush",

714

softdep_flush,

715

&softdepproc

716

};

717

SYSINIT(sdproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,

718

&softdep_kp);

719

720

static void

721

softdep_flush(void)

722

{

723

struct mount *nmp;

724

struct mount *mp;

725

struct ufsmount *ump;

726

struct thread *td;

727

int remaining;

728

int vfslocked;

729

730

td = curthread;

731

td->td_pflags |= TDP_NORUNNINGBUF;

732

733

for (;;) {

734

kthread_suspend_check(softdepproc);

735

vfslocked = VFS_LOCK_GIANT((struct mount *)NULL);

736

ACQUIRE_LOCK(&lk);

737

738

* If requested, try removing inode or removal dependencies.

739

740

if (req_clear_inodedeps) {

741

clear_inodedeps(td);

742

req_clear_inodedeps -= 1;

743

wakeup_one(&proc_waiting);

744

}

745

if (req_clear_remove) {

746

clear_remove(td);

747

req_clear_remove -= 1;

748

wakeup_one(&proc_waiting);

749

}

750

FREE_LOCK(&lk);

751

VFS_UNLOCK_GIANT(vfslocked);

752

remaining = 0;

753

mtx_lock(&mountlist_mtx);

754

for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {

755

nmp = TAILQ_NEXT(mp, mnt_list);

756

if ((mp->mnt_flag & MNT_SOFTDEP) == 0)

757

continue;

758

if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td))

759

continue;

760

vfslocked = VFS_LOCK_GIANT(mp);

761

softdep_process_worklist(mp, 0);

762

ump = VFSTOUFS(mp);

763

remaining += ump->softdep_on_worklist -

764

ump->softdep_on_worklist_inprogress;

765

VFS_UNLOCK_GIANT(vfslocked);

766

mtx_lock(&mountlist_mtx);

767

nmp = TAILQ_NEXT(mp, mnt_list);

768

vfs_unbusy(mp, td);

769

}

770

mtx_unlock(&mountlist_mtx);

771

if (remaining)

772

continue;

773

ACQUIRE_LOCK(&lk);

774

if (!req_pending)

775

msleep(&req_pending, &lk, PVM, "sdflush", hz);

776

req_pending = 0;

777

FREE_LOCK(&lk);

778

}

779

}

780

781

static int

782

softdep_speedup(void)

783

{

784

785

mtx_assert(&lk, MA_OWNED);

786

if (req_pending == 0) {

787

req_pending = 1;

788

wakeup(&req_pending);

789

}

790

791

return speedup_syncer();

792

}

793

794

795

* Add an item to the end of the work queue.

796

* This routine requires that the lock be held.

797

* This is the only routine that adds items to the list.

798

* The following routine is the only one that removes items

799

* and does so in order from first to last.

800

801

static void

802

add_to_worklist(wk)

803

struct worklist *wk;

804

{

805

struct ufsmount *ump;

806

807

mtx_assert(&lk, MA_OWNED);

808

ump = VFSTOUFS(wk->wk_mp);

809

if (wk->wk_state & ONWORKLIST)

810

panic("add_to_worklist: already on list");

811

wk->wk_state |= ONWORKLIST;

812

if (LIST_EMPTY(&ump->softdep_workitem_pending))

813

LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);

814

else

815

LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list);

816

ump->softdep_worklist_tail = wk;

817

ump->softdep_on_worklist += 1;

818

}

819

820

821

* Process that runs once per second to handle items in the background queue.

822

823

* Note that we ensure that everything is done in the order in which they

824

* appear in the queue. The code below depends on this property to ensure

825

* that blocks of a file are freed before the inode itself is freed. This

826

* ordering ensures that no new <vfsid, inum, lbn> triples will be generated

827

* until all the old ones have been purged from the dependency lists.

828

829

int

830

softdep_process_worklist(mp, full)

831

struct mount *mp;

832

int full;

833

{

834

struct thread *td = curthread;

835

int cnt, matchcnt, loopcount;

836

struct ufsmount *ump;

837

long starttime;

838

839

KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));

840

841

* Record the process identifier of our caller so that we can give

842

* this process preferential treatment in request_cleanup below.

843

844

matchcnt = 0;

845

ump = VFSTOUFS(mp);

846

ACQUIRE_LOCK(&lk);

847

loopcount = 1;

848

starttime = time_second;

849

while (ump->softdep_on_worklist > 0) {

850

if ((cnt = process_worklist_item(mp, 0)) == -1)

851

break;

852

else

853

matchcnt += cnt;

854

855

* If requested, try removing inode or removal dependencies.

856

857

if (req_clear_inodedeps) {

858

clear_inodedeps(td);

859

req_clear_inodedeps -= 1;

860

wakeup_one(&proc_waiting);

861

}

862

if (req_clear_remove) {

863

clear_remove(td);

864

req_clear_remove -= 1;

865

wakeup_one(&proc_waiting);

866

}

867

868

* We do not generally want to stop for buffer space, but if

869

* we are really being a buffer hog, we will stop and wait.

870

871

if (loopcount++ % 128 == 0) {

872

FREE_LOCK(&lk);

873

uio_yield();

874

bwillwrite();

875

ACQUIRE_LOCK(&lk);

876

}

877

878

* Never allow processing to run for more than one

879

* second. Otherwise the other mountpoints may get

880

* excessively backlogged.

881

882

if (!full && starttime != time_second) {

883

matchcnt = -1;

884

break;

885

}

886

}

887

FREE_LOCK(&lk);

888

return (matchcnt);

889

}

890

891

892

* Process one item on the worklist.

893

894

static int

895

process_worklist_item(mp, flags)

896

struct mount *mp;

897

int flags;

898

{

899

struct worklist *wk, *wkend;

900

struct ufsmount *ump;

901

struct vnode *vp;

902

int matchcnt = 0;

903

904

mtx_assert(&lk, MA_OWNED);

905

KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));

906

907

* If we are being called because of a process doing a

908

* copy-on-write, then it is not safe to write as we may

909

* recurse into the copy-on-write routine.

910

911

if (curthread->td_pflags & TDP_COWINPROGRESS)

912

return (-1);

913

914

* Normally we just process each item on the worklist in order.

915

* However, if we are in a situation where we cannot lock any

916

* inodes, we have to skip over any dirrem requests whose

917

* vnodes are resident and locked.

918

919

ump = VFSTOUFS(mp);

920

vp = NULL;

921

LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {

922

if (wk->wk_state & INPROGRESS)

923

continue;

924

if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)

925

break;

926

wk->wk_state |= INPROGRESS;

927

ump->softdep_on_worklist_inprogress++;

928

FREE_LOCK(&lk);

929

ffs_vgetf(mp, WK_DIRREM(wk)->dm_oldinum,

930

LK_NOWAIT | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ);

931

ACQUIRE_LOCK(&lk);

932

wk->wk_state &= ~INPROGRESS;

933

ump->softdep_on_worklist_inprogress--;

934

if (vp != NULL)

935

break;

936

}

937

if (wk == 0)

938

return (-1);

939

940

* Remove the item to be processed. If we are removing the last

941

* item on the list, we need to recalculate the tail pointer.

942

* As this happens rarely and usually when the list is short,

943

* we just run down the list to find it rather than tracking it

944

* in the above loop.

945

946

WORKLIST_REMOVE(wk);

947

if (wk == ump->softdep_worklist_tail) {

948

LIST_FOREACH(wkend, &ump->softdep_workitem_pending, wk_list)

949

if (LIST_NEXT(wkend, wk_list) == NULL)

950

break;

951

ump->softdep_worklist_tail = wkend;

952

}

953

ump->softdep_on_worklist -= 1;

954

FREE_LOCK(&lk);

955

if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))

956

panic("process_worklist_item: suspended filesystem");

957

matchcnt++;

958

switch (wk->wk_type) {

959

960

case D_DIRREM:

961

/* removal of a directory entry */

962

handle_workitem_remove(WK_DIRREM(wk), vp);

963

break;

964

965

case D_FREEBLKS:

966

/* releasing blocks and/or fragments from a file */

967

handle_workitem_freeblocks(WK_FREEBLKS(wk), flags & LK_NOWAIT);

968

break;

969

970

case D_FREEFRAG:

971

/* releasing a fragment when replaced as a file grows */

972

handle_workitem_freefrag(WK_FREEFRAG(wk));

973

break;

974

975

case D_FREEFILE:

976

/* releasing an inode when its link count drops to 0 */

977

handle_workitem_freefile(WK_FREEFILE(wk));

978

break;

979

980

default:

981

panic("%s_process_worklist: Unknown type %s",

982

"softdep", TYPENAME(wk->wk_type));

983

/* NOTREACHED */

984

}

985

vn_finished_secondary_write(mp);

986

ACQUIRE_LOCK(&lk);

987

return (matchcnt);

988

}

989

990

991

* Move dependencies from one buffer to another.

992

993

void

994

softdep_move_dependencies(oldbp, newbp)

995

struct buf *oldbp;

996

struct buf *newbp;

997

{

998

struct worklist *wk, *wktail;

999

1000

if (!LIST_EMPTY(&newbp->b_dep))

1001

panic("softdep_move_dependencies: need merge code");

1002

wktail = 0;

1003

ACQUIRE_LOCK(&lk);

1004

while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {

1005

LIST_REMOVE(wk, wk_list);

1006

if (wktail == 0)

1007

LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);

1008

else

1009

LIST_INSERT_AFTER(wktail, wk, wk_list);

1010

wktail = wk;

1011

}

1012

FREE_LOCK(&lk);

1013

}

1014

1015

1016

* Purge the work list of all items associated with a particular mount point.

1017

1018

int

1019

softdep_flushworklist(oldmnt, countp, td)

1020

struct mount *oldmnt;

1021

int *countp;

1022

struct thread *td;

1023

{

1024

struct vnode *devvp;

1025

int count, error = 0;

1026

struct ufsmount *ump;

1027

1028

1029

* Alternately flush the block device associated with the mount

1030

* point and process any dependencies that the flushing

1031

* creates. We continue until no more worklist dependencies

1032

* are found.

1033

1034

*countp = 0;

1035

ump = VFSTOUFS(oldmnt);

1036

devvp = ump->um_devvp;

1037

while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {

1038

*countp += count;

1039

vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);

1040

error = VOP_FSYNC(devvp, MNT_WAIT, td);

1041

VOP_UNLOCK(devvp, 0, td);

1042

if (error)

1043

break;

1044

}

1045

return (error);

1046

}

1047

1048

int

1049

softdep_waitidle(struct mount *mp)

1050

{

1051

struct ufsmount *ump;

1052

int error;

1053

int i;

1054

1055

ump = VFSTOUFS(mp);

1056

ACQUIRE_LOCK(&lk);

1057

for (i = 0; i < 10 && ump->softdep_deps; i++) {

1058

ump->softdep_req = 1;

1059

if (ump->softdep_on_worklist)

1060

panic("softdep_waitidle: work added after flush.");

1061

msleep(&ump->softdep_deps, &lk, PVM, "softdeps", 1);

1062

}

1063

ump->softdep_req = 0;

1064

FREE_LOCK(&lk);

1065

error = 0;

1066

if (i == 10) {

1067

error = EBUSY;

1068

printf("softdep_waitidle: Failed to flush worklist for %p\n",

1069

mp);

1070

}

1071

1072

return (error);

1073

}

1074

1075

1076

* Flush all vnodes and worklist items associated with a specified mount point.

1077

1078

int

1079

softdep_flushfiles(oldmnt, flags, td)

1080

struct mount *oldmnt;

1081

int flags;

1082

struct thread *td;

1083

{

1084

int error, depcount, loopcnt, retry_flush_count, retry;

1085

1086

loopcnt = 10;

1087

retry_flush_count = 3;

1088

retry_flush:

1089

error = 0;

1090

1091

1092

* Alternately flush the vnodes associated with the mount

1093

* point and process any dependencies that the flushing

1094

* creates. In theory, this loop can happen at most twice,

1095

* but we give it a few extra just to be sure.

1096

1097

for (; loopcnt > 0; loopcnt--) {

1098

1099

* Do another flush in case any vnodes were brought in

1100

* as part of the cleanup operations.

1101

1102

if ((error = ffs_flushfiles(oldmnt, flags, td)) != 0)

1103

break;

1104

if ((error = softdep_flushworklist(oldmnt, &depcount, td)) != 0 ||

1105

depcount == 0)

1106

break;

1107

}

1108

1109

* If we are unmounting then it is an error to fail. If we

1110

* are simply trying to downgrade to read-only, then filesystem

1111

* activity can keep us busy forever, so we just fail with EBUSY.

1112

1113

if (loopcnt == 0) {

1114

if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT)

1115

panic("softdep_flushfiles: looping");

1116

error = EBUSY;

1117

}

1118

if (!error)

1119

error = softdep_waitidle(oldmnt);

1120

if (!error) {

1121

if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) {

1122

retry = 0;

1123

MNT_ILOCK(oldmnt);

1124

KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0,

1125

("softdep_flushfiles: !MNTK_NOINSMNTQ"));

1126

if (oldmnt->mnt_nvnodelistsize > 0) {

1127

if (--retry_flush_count > 0) {

1128

retry = 1;

1129

loopcnt = 3;

1130

} else

1131

error = EBUSY;

1132

}

1133

MNT_IUNLOCK(oldmnt);

1134

if (retry)

1135

goto retry_flush;

1136

}

1137

}

1138

return (error);

1139

}

1140

1141

1142

* Structure hashing.

1143

1144

* There are three types of structures that can be looked up:

1145

* 1) pagedep structures identified by mount point, inode number,

1146

* and logical block.

1147

* 2) inodedep structures identified by mount point and inode number.

1148

* 3) newblk structures identified by mount point and

1149

* physical block number.

1150

1151

* The "pagedep" and "inodedep" dependency structures are hashed

1152

* separately from the file blocks and inodes to which they correspond.

1153

* This separation helps when the in-memory copy of an inode or

1154

* file block must be replaced. It also obviates the need to access

1155

* an inode or file page when simply updating (or de-allocating)

1156

* dependency structures. Lookup of newblk structures is needed to

1157

* find newly allocated blocks when trying to associate them with

1158

* their allocdirect or allocindir structure.

1159

1160

* The lookup routines optionally create and hash a new instance when

1161

* an existing entry is not found.

1162

1163

#define DEPALLOC 0x0001 /* allocate structure if lookup fails */

1164

#define NODELAY 0x0002 /* cannot do background work */

1165

1166

1167

* Structures and routines associated with pagedep caching.

1168

1169

LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;

1170

u_long pagedep_hash; /* size of hash table - 1 */

1171

#define PAGEDEP_HASH(mp, inum, lbn) \

1172

(&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \

1173

pagedep_hash])

1174

1175

static int

1176

pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp)

1177

struct pagedep_hashhead *pagedephd;

1178

ino_t ino;

1179

ufs_lbn_t lbn;

1180

struct mount *mp;

1181

int flags;

1182

struct pagedep **pagedeppp;

1183

{

1184

struct pagedep *pagedep;

1185

1186

LIST_FOREACH(pagedep, pagedephd, pd_hash)

1187

if (ino == pagedep->pd_ino &&

1188

lbn == pagedep->pd_lbn &&

1189

mp == pagedep->pd_list.wk_mp)

1190

break;

1191

if (pagedep) {

1192

*pagedeppp = pagedep;

1193

if ((flags & DEPALLOC) != 0 &&

1194

(pagedep->pd_state & ONWORKLIST) == 0)

1195

return (0);

1196

return (1);

1197

}

1198

*pagedeppp = NULL;

1199

return (0);

1200

}

1201

1202

* Look up a pagedep. Return 1 if found, 0 if not found or found

1203

* when asked to allocate but not associated with any buffer.

1204

* If not found, allocate if DEPALLOC flag is passed.

1205

* Found or allocated entry is returned in pagedeppp.

1206

* This routine must be called with splbio interrupts blocked.

1207

1208

static int

1209

pagedep_lookup(ip, lbn, flags, pagedeppp)

1210

struct inode *ip;

1211

ufs_lbn_t lbn;

1212

int flags;

1213

struct pagedep **pagedeppp;

1214

{

1215

struct pagedep *pagedep;

1216

struct pagedep_hashhead *pagedephd;

1217

struct mount *mp;

1218

int ret;

1219

int i;

1220

1221

mtx_assert(&lk, MA_OWNED);

1222

mp = ITOV(ip)->v_mount;

1223

pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);

1224

1225

ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp);

1226

if (*pagedeppp || (flags & DEPALLOC) == 0)

1227

return (ret);

1228

FREE_LOCK(&lk);

1229

MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep),

1230

M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);

1231

workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);

1232

ACQUIRE_LOCK(&lk);

1233

ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp);

1234

if (*pagedeppp) {

1235

WORKITEM_FREE(pagedep, D_PAGEDEP);

1236

return (ret);

1237

}

1238

pagedep->pd_ino = ip->i_number;

1239

pagedep->pd_lbn = lbn;

1240

LIST_INIT(&pagedep->pd_dirremhd);

1241

LIST_INIT(&pagedep->pd_pendinghd);

1242

for (i = 0; i < DAHASHSZ; i++)

1243

LIST_INIT(&pagedep->pd_diraddhd[i]);

1244

LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);

1245

*pagedeppp = pagedep;

1246

return (0);

1247

}

1248

1249

1250

* Structures and routines associated with inodedep caching.

1251

1252

LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;

1253

static u_long inodedep_hash; /* size of hash table - 1 */

1254

static long num_inodedep; /* number of inodedep allocated */

1255

#define INODEDEP_HASH(fs, inum) \

1256

(&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])

1257

1258

static int

1259

inodedep_find(inodedephd, fs, inum, inodedeppp)

1260

struct inodedep_hashhead *inodedephd;

1261

struct fs *fs;

1262

ino_t inum;

1263

struct inodedep **inodedeppp;

1264

{

1265

struct inodedep *inodedep;

1266

1267

LIST_FOREACH(inodedep, inodedephd, id_hash)

1268

if (inum == inodedep->id_ino && fs == inodedep->id_fs)

1269

break;

1270

if (inodedep) {

1271

*inodedeppp = inodedep;

1272

return (1);

1273

}

1274

*inodedeppp = NULL;

1275

1276

return (0);

1277

}

1278

1279

* Look up an inodedep. Return 1 if found, 0 if not found.

1280

* If not found, allocate if DEPALLOC flag is passed.

1281

* Found or allocated entry is returned in inodedeppp.

1282

* This routine must be called with splbio interrupts blocked.

1283

1284

static int

1285

inodedep_lookup(mp, inum, flags, inodedeppp)

1286

struct mount *mp;

1287

ino_t inum;

1288

int flags;

1289

struct inodedep **inodedeppp;

1290

{

1291

struct inodedep *inodedep;

1292

struct inodedep_hashhead *inodedephd;

1293

struct fs *fs;

1294

1295

mtx_assert(&lk, MA_OWNED);

1296

fs = VFSTOUFS(mp)->um_fs;

1297

inodedephd = INODEDEP_HASH(fs, inum);

1298

1299

if (inodedep_find(inodedephd, fs, inum, inodedeppp))

1300

return (1);

1301

if ((flags & DEPALLOC) == 0)

1302

return (0);

1303

1304

* If we are over our limit, try to improve the situation.

1305

1306

if (num_inodedep > max_softdeps && (flags & NODELAY) == 0)

1307

request_cleanup(mp, FLUSH_INODES);

1308

FREE_LOCK(&lk);

1309

MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep),

1310

M_INODEDEP, M_SOFTDEP_FLAGS);

1311

workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);

1312

ACQUIRE_LOCK(&lk);

1313

if (inodedep_find(inodedephd, fs, inum, inodedeppp)) {

1314

WORKITEM_FREE(inodedep, D_INODEDEP);

1315

return (1);

1316

}

1317

num_inodedep += 1;

1318

inodedep->id_fs = fs;

1319

inodedep->id_ino = inum;

1320

inodedep->id_state = ALLCOMPLETE;

1321

inodedep->id_nlinkdelta = 0;

1322

inodedep->id_savedino1 = NULL;

1323

inodedep->id_savedsize = -1;

1324

inodedep->id_savedextsize = -1;

1325

inodedep->id_buf = NULL;

1326

LIST_INIT(&inodedep->id_pendinghd);

1327

LIST_INIT(&inodedep->id_inowait);

1328

LIST_INIT(&inodedep->id_bufwait);

1329

TAILQ_INIT(&inodedep->id_inoupdt);

1330

TAILQ_INIT(&inodedep->id_newinoupdt);

1331

TAILQ_INIT(&inodedep->id_extupdt);

1332

TAILQ_INIT(&inodedep->id_newextupdt);

1333

LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);

1334

*inodedeppp = inodedep;

1335

return (0);

1336

}

1337

1338

1339

* Structures and routines associated with newblk caching.

1340

1341

LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;

1342

u_long newblk_hash; /* size of hash table - 1 */

1343

#define NEWBLK_HASH(fs, inum) \

1344

(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])

1345

1346

static int

1347

newblk_find(newblkhd, fs, newblkno, newblkpp)

1348

struct newblk_hashhead *newblkhd;

1349

struct fs *fs;

1350

ufs2_daddr_t newblkno;

1351

struct newblk **newblkpp;

1352

{

1353

struct newblk *newblk;

1354

1355

LIST_FOREACH(newblk, newblkhd, nb_hash)

1356

if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)

1357

break;

1358

if (newblk) {

1359

*newblkpp = newblk;

1360

return (1);

1361

}

1362

*newblkpp = NULL;

1363

return (0);

1364

}

1365

1366

1367

* Look up a newblk. Return 1 if found, 0 if not found.

1368

* If not found, allocate if DEPALLOC flag is passed.

1369

* Found or allocated entry is returned in newblkpp.

1370

1371

static int

1372

newblk_lookup(fs, newblkno, flags, newblkpp)

1373

struct fs *fs;

1374

ufs2_daddr_t newblkno;

1375

int flags;

1376

struct newblk **newblkpp;

1377

{

1378

struct newblk *newblk;

1379

struct newblk_hashhead *newblkhd;

1380

1381

newblkhd = NEWBLK_HASH(fs, newblkno);

1382

if (newblk_find(newblkhd, fs, newblkno, newblkpp))

1383

return (1);

1384

if ((flags & DEPALLOC) == 0)

1385

return (0);

1386

FREE_LOCK(&lk);

1387

MALLOC(newblk, struct newblk *, sizeof(struct newblk),

1388

M_NEWBLK, M_SOFTDEP_FLAGS);

1389

ACQUIRE_LOCK(&lk);

1390

if (newblk_find(newblkhd, fs, newblkno, newblkpp)) {

1391

FREE(newblk, M_NEWBLK);

1392

return (1);

1393

}

1394

newblk->nb_state = 0;

1395

newblk->nb_fs = fs;

1396

newblk->nb_newblkno = newblkno;

1397

LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);

1398

*newblkpp = newblk;

1399

return (0);

1400

}

1401

1402

1403

* Executed during filesystem system initialization before

1404

* mounting any filesystems.

1405

1406

void

1407

softdep_initialize()

1408

{

1409

1410

LIST_INIT(&mkdirlisthd);

1411

max_softdeps = desiredvnodes * 4;

1412

pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,

1413

&pagedep_hash);

1414

inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);

1415

newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash);

1416

1417

/* initialise bioops hack */

1418

bioops.io_start = softdep_disk_io_initiation;

1419

bioops.io_complete = softdep_disk_write_complete;

1420

bioops.io_deallocate = softdep_deallocate_dependencies;

1421

bioops.io_countdeps = softdep_count_dependencies;

1422

1423

/* Initialize the callout with an mtx. */

1424

callout_init_mtx(&softdep_callout, &lk, 0);

1425

}

1426

1427

1428

* Executed after all filesystems have been unmounted during

1429

* filesystem module unload.

1430

1431

void

1432

softdep_uninitialize()

1433

{

1434

1435

callout_drain(&softdep_callout);

1436

hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash);

1437

hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash);

1438

hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash);

1439

}

1440

1441

1442

* Called at mount time to notify the dependency code that a

1443

* filesystem wishes to use it.

1444

1445

int

1446

softdep_mount(devvp, mp, fs, cred)

1447

struct vnode *devvp;

1448

struct mount *mp;

1449

struct fs *fs;

1450

struct ucred *cred;

1451

{

1452

struct csum_total cstotal;

1453

struct ufsmount *ump;

1454

struct cg *cgp;

1455

struct buf *bp;

1456

int error, cyl;

1457

1458

MNT_ILOCK(mp);

1459

mp->mnt_flag = (mp->mnt_flag & ~MNT_ASYNC) | MNT_SOFTDEP;

1460

if ((mp->mnt_kern_flag & MNTK_SOFTDEP) == 0) {

1461

mp->mnt_kern_flag = (mp->mnt_kern_flag & ~MNTK_ASYNC) |

1462

MNTK_SOFTDEP;

1463

mp->mnt_noasync++;

1464

}

1465

MNT_IUNLOCK(mp);

1466

ump = VFSTOUFS(mp);

1467

LIST_INIT(&ump->softdep_workitem_pending);

1468

ump->softdep_worklist_tail = NULL;

1469

ump->softdep_on_worklist = 0;

1470

ump->softdep_deps = 0;

1471

1472

* When doing soft updates, the counters in the

1473

* superblock may have gotten out of sync. Recomputation

1474

* can take a long time and can be deferred for background

1475

* fsck. However, the old behavior of scanning the cylinder

1476

* groups and recalculating them at mount time is available

1477

* by setting vfs.ffs.compute_summary_at_mount to one.

1478

1479

if (compute_summary_at_mount == 0 || fs->fs_clean != 0)

1480

return (0);

1481

bzero(&cstotal, sizeof cstotal);

1482

for (cyl = 0; cyl < fs->fs_ncg; cyl++) {

1483

if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),

1484

fs->fs_cgsize, cred, &bp)) != 0) {

1485

brelse(bp);

1486

return (error);

1487

}

1488

cgp = (struct cg *)bp->b_data;

1489

cstotal.cs_nffree += cgp->cg_cs.cs_nffree;

1490

cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;

1491

cstotal.cs_nifree += cgp->cg_cs.cs_nifree;

1492

cstotal.cs_ndir += cgp->cg_cs.cs_ndir;

1493

fs->fs_cs(fs, cyl) = cgp->cg_cs;

1494

brelse(bp);

1495

}

1496

#ifdef DEBUG

1497

if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))

1498

printf("%s: superblock summary recomputed\n", fs->fs_fsmnt);

1499

#endif

1500

bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);

1501

return (0);

1502

}

1503

1504

1505

* Protecting the freemaps (or bitmaps).

1506

1507

* To eliminate the need to execute fsck before mounting a filesystem

1508

* after a power failure, one must (conservatively) guarantee that the

1509

* on-disk copy of the bitmaps never indicate that a live inode or block is

1510

* free. So, when a block or inode is allocated, the bitmap should be

1511

* updated (on disk) before any new pointers. When a block or inode is

1512

* freed, the bitmap should not be updated until all pointers have been

1513

* reset. The latter dependency is handled by the delayed de-allocation

1514

* approach described below for block and inode de-allocation. The former

1515

* dependency is handled by calling the following procedure when a block or

1516

* inode is allocated. When an inode is allocated an "inodedep" is created

1517

* with its DEPCOMPLETE flag cleared until its bitmap is written to disk.

1518

* Each "inodedep" is also inserted into the hash indexing structure so

1519

* that any additional link additions can be made dependent on the inode

1520

* allocation.

1521

1522

* The ufs filesystem maintains a number of free block counts (e.g., per

1523

* cylinder group, per cylinder and per <cylinder, rotational position> pair)

1524

* in addition to the bitmaps. These counts are used to improve efficiency

1525

* during allocation and therefore must be consistent with the bitmaps.

1526

* There is no convenient way to guarantee post-crash consistency of these

1527

* counts with simple update ordering, for two main reasons: (1) The counts

1528

* and bitmaps for a single cylinder group block are not in the same disk

1529

* sector. If a disk write is interrupted (e.g., by power failure), one may

1530

* be written and the other not. (2) Some of the counts are located in the

1531

* superblock rather than the cylinder group block. So, we focus our soft

1532

* updates implementation on protecting the bitmaps. When mounting a

1533

* filesystem, we recompute the auxiliary counts from the bitmaps.

1534

1535

1536

1537

* Called just after updating the cylinder group block to allocate an inode.

1538

1539

void

1540

softdep_setup_inomapdep(bp, ip, newinum)

1541

struct buf *bp; /* buffer for cylgroup block with inode map */

1542

struct inode *ip; /* inode related to allocation */

1543

ino_t newinum; /* new inode number being allocated */

1544

{

1545

struct inodedep *inodedep;

1546

struct bmsafemap *bmsafemap;

1547

1548

1549

* Create a dependency for the newly allocated inode.

1550

* Panic if it already exists as something is seriously wrong.

1551

* Otherwise add it to the dependency list for the buffer holding

1552

* the cylinder group map from which it was allocated.

1553

1554

ACQUIRE_LOCK(&lk);

1555

if ((inodedep_lookup(UFSTOVFS(ip->i_ump), newinum, DEPALLOC|NODELAY,

1556

&inodedep)))

1557

panic("softdep_setup_inomapdep: dependency for new inode "

1558

"already exists");

1559

inodedep->id_buf = bp;

1560

inodedep->id_state &= ~DEPCOMPLETE;

1561

bmsafemap = bmsafemap_lookup(inodedep->id_list.wk_mp, bp);

1562

LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);

1563

FREE_LOCK(&lk);

1564

}

1565

1566

1567

* Called just after updating the cylinder group block to

1568

* allocate block or fragment.

1569

1570

void

1571

softdep_setup_blkmapdep(bp, mp, newblkno)

1572

struct buf *bp; /* buffer for cylgroup block with block map */

1573

struct mount *mp; /* filesystem doing allocation */

1574

ufs2_daddr_t newblkno; /* number of newly allocated block */

1575

{

1576

struct newblk *newblk;

1577

struct bmsafemap *bmsafemap;

1578

struct fs *fs;

1579

1580

fs = VFSTOUFS(mp)->um_fs;

1581

1582

* Create a dependency for the newly allocated block.

1583

* Add it to the dependency list for the buffer holding

1584

* the cylinder group map from which it was allocated.

1585

1586

ACQUIRE_LOCK(&lk);

1587

if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)

1588

panic("softdep_setup_blkmapdep: found block");

1589

newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp);

1590

LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);

1591

FREE_LOCK(&lk);

1592

}

1593

1594

1595

* Find the bmsafemap associated with a cylinder group buffer.

1596

* If none exists, create one. The buffer must be locked when

1597

* this routine is called and this routine must be called with

1598

* splbio interrupts blocked.

1599

1600

static struct bmsafemap *

1601

bmsafemap_lookup(mp, bp)

1602

struct mount *mp;

1603

struct buf *bp;

1604

{

1605

struct bmsafemap *bmsafemap;

1606

struct worklist *wk;

1607

1608

mtx_assert(&lk, MA_OWNED);

1609

LIST_FOREACH(wk, &bp->b_dep, wk_list)

1610

if (wk->wk_type == D_BMSAFEMAP)

1611

return (WK_BMSAFEMAP(wk));

1612

FREE_LOCK(&lk);

1613

MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap),

1614

M_BMSAFEMAP, M_SOFTDEP_FLAGS);

1615

workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);

1616

bmsafemap->sm_buf = bp;

1617

LIST_INIT(&bmsafemap->sm_allocdirecthd);

1618

LIST_INIT(&bmsafemap->sm_allocindirhd);

1619

LIST_INIT(&bmsafemap->sm_inodedephd);

1620

LIST_INIT(&bmsafemap->sm_newblkhd);

1621

ACQUIRE_LOCK(&lk);

1622

WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);

1623

return (bmsafemap);

1624

}

1625

1626

1627

* Direct block allocation dependencies.

1628

1629

* When a new block is allocated, the corresponding disk locations must be

1630

* initialized (with zeros or new data) before the on-disk inode points to

1631

* them. Also, the freemap from which the block was allocated must be

1632

* updated (on disk) before the inode's pointer. These two dependencies are

1633

* independent of each other and are needed for all file blocks and indirect

1634

* blocks that are pointed to directly by the inode. Just before the

1635

* "in-core" version of the inode is updated with a newly allocated block

1636

* number, a procedure (below) is called to setup allocation dependency

1637

* structures. These structures are removed when the corresponding

1638

* dependencies are satisfied or when the block allocation becomes obsolete

1639

* (i.e., the file is deleted, the block is de-allocated, or the block is a

1640

* fragment that gets upgraded). All of these cases are handled in

1641

* procedures described later.

1642

1643

* When a file extension causes a fragment to be upgraded, either to a larger

1644

* fragment or to a full block, the on-disk location may change (if the

1645

* previous fragment could not simply be extended). In this case, the old

1646

* fragment must be de-allocated, but not until after the inode's pointer has

1647

* been updated. In most cases, this is handled by later procedures, which

1648

* will construct a "freefrag" structure to be added to the workitem queue

1649

* when the inode update is complete (or obsolete). The main exception to

1650

* this is when an allocation occurs while a pending allocation dependency

1651

* (for the same block pointer) remains. This case is handled in the main

1652

* allocation dependency setup procedure by immediately freeing the

1653

* unreferenced fragments.

1654

1655

void

1656

softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)

1657

struct inode *ip; /* inode to which block is being added */

1658

ufs_lbn_t lbn; /* block pointer within inode */

1659

ufs2_daddr_t newblkno; /* disk block number being added */

1660

ufs2_daddr_t oldblkno; /* previous block number, 0 unless frag */

1661

long newsize; /* size of new block */

1662

long oldsize; /* size of new block */

1663

struct buf *bp; /* bp for allocated block */

1664

{

1665

struct allocdirect *adp, *oldadp;

1666

struct allocdirectlst *adphead;

1667

struct bmsafemap *bmsafemap;

1668

struct inodedep *inodedep;

1669

struct pagedep *pagedep;

1670

struct newblk *newblk;

1671

struct mount *mp;

1672

1673

mp = UFSTOVFS(ip->i_ump);

1674

MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),

1675

M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO);

1676

workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp);

1677

adp->ad_lbn = lbn;

1678

adp->ad_newblkno = newblkno;

1679

adp->ad_oldblkno = oldblkno;

1680

adp->ad_newsize = newsize;

1681

adp->ad_oldsize = oldsize;

1682

adp->ad_state = ATTACHED;

1683

LIST_INIT(&adp->ad_newdirblk);

1684

if (newblkno == oldblkno)

1685

adp->ad_freefrag = NULL;

1686

else

1687

adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);

1688

1689

ACQUIRE_LOCK(&lk);

1690

if (lbn >= NDADDR) {

1691

/* allocating an indirect block */

1692

if (oldblkno != 0)

1693

panic("softdep_setup_allocdirect: non-zero indir");

1694

} else {

1695

1696

* Allocating a direct block.

1697

1698

* If we are allocating a directory block, then we must

1699

* allocate an associated pagedep to track additions and

1700

* deletions.

1701

1702

if ((ip->i_mode & IFMT) == IFDIR &&

1703

pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)

1704

WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);

1705

}

1706

if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)

1707

panic("softdep_setup_allocdirect: lost block");

1708

if (newblk->nb_state == DEPCOMPLETE) {

1709

adp->ad_state |= DEPCOMPLETE;

1710

adp->ad_buf = NULL;

1711

} else {

1712

bmsafemap = newblk->nb_bmsafemap;

1713

adp->ad_buf = bmsafemap->sm_buf;

1714

LIST_REMOVE(newblk, nb_deps);

1715

LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);

1716

}

1717

LIST_REMOVE(newblk, nb_hash);

1718

FREE(newblk, M_NEWBLK);

1719

1720

inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);

1721

adp->ad_inodedep = inodedep;

1722

WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);

1723

1724

* The list of allocdirects must be kept in sorted and ascending

1725

* order so that the rollback routines can quickly determine the

1726

* first uncommitted block (the size of the file stored on disk

1727

* ends at the end of the lowest committed fragment, or if there

1728

* are no fragments, at the end of the highest committed block).

1729

* Since files generally grow, the typical case is that the new

1730

* block is to be added at the end of the list. We speed this

1731

* special case by checking against the last allocdirect in the

1732

* list before laboriously traversing the list looking for the

1733

* insertion point.

1734

1735

adphead = &inodedep->id_newinoupdt;

1736

oldadp = TAILQ_LAST(adphead, allocdirectlst);

1737

if (oldadp == NULL || oldadp->ad_lbn <= lbn) {

1738

/* insert at end of list */

1739

TAILQ_INSERT_TAIL(adphead, adp, ad_next);

1740

if (oldadp != NULL && oldadp->ad_lbn == lbn)

1741

allocdirect_merge(adphead, adp, oldadp);

1742

FREE_LOCK(&lk);

1743

return;

1744

}

1745

TAILQ_FOREACH(oldadp, adphead, ad_next) {

1746

if (oldadp->ad_lbn >= lbn)

1747

break;

1748

}

1749

if (oldadp == NULL)

1750

panic("softdep_setup_allocdirect: lost entry");

1751

/* insert in middle of list */

1752

TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);

1753

if (oldadp->ad_lbn == lbn)

1754

allocdirect_merge(adphead, adp, oldadp);

1755

FREE_LOCK(&lk);

1756

}

1757

1758

1759

* Replace an old allocdirect dependency with a newer one.

1760

* This routine must be called with splbio interrupts blocked.

1761

1762

static void

1763

allocdirect_merge(adphead, newadp, oldadp)

1764

struct allocdirectlst *adphead; /* head of list holding allocdirects */

1765

struct allocdirect *newadp; /* allocdirect being added */

1766

struct allocdirect *oldadp; /* existing allocdirect being checked */

1767

{

1768

struct worklist *wk;

1769

struct freefrag *freefrag;

1770

struct newdirblk *newdirblk;

1771

1772

mtx_assert(&lk, MA_OWNED);

1773

if (newadp->ad_oldblkno != oldadp->ad_newblkno ||

1774

newadp->ad_oldsize != oldadp->ad_newsize ||

1775

newadp->ad_lbn >= NDADDR)

1776

panic("%s %jd != new %jd || old size %ld != new %ld",

1777

"allocdirect_merge: old blkno",

1778

(intmax_t)newadp->ad_oldblkno,

1779

(intmax_t)oldadp->ad_newblkno,

1780

newadp->ad_oldsize, oldadp->ad_newsize);

1781

newadp->ad_oldblkno = oldadp->ad_oldblkno;

1782

newadp->ad_oldsize = oldadp->ad_oldsize;

1783

1784

* If the old dependency had a fragment to free or had never

1785

* previously had a block allocated, then the new dependency

1786

* can immediately post its freefrag and adopt the old freefrag.

1787

* This action is done by swapping the freefrag dependencies.

1788

* The new dependency gains the old one's freefrag, and the

1789

* old one gets the new one and then immediately puts it on

1790

* the worklist when it is freed by free_allocdirect. It is

1791

* not possible to do this swap when the old dependency had a

1792

* non-zero size but no previous fragment to free. This condition

1793

* arises when the new block is an extension of the old block.

1794

* Here, the first part of the fragment allocated to the new

1795

* dependency is part of the block currently claimed on disk by

1796

* the old dependency, so cannot legitimately be freed until the

1797

* conditions for the new dependency are fulfilled.

1798

1799

if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {

1800

freefrag = newadp->ad_freefrag;

1801

newadp->ad_freefrag = oldadp->ad_freefrag;

1802

oldadp->ad_freefrag = freefrag;

1803

}

1804

1805

* If we are tracking a new directory-block allocation,

1806

* move it from the old allocdirect to the new allocdirect.

1807

1808

if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {

1809

newdirblk = WK_NEWDIRBLK(wk);

1810

WORKLIST_REMOVE(&newdirblk->db_list);

1811

if (!LIST_EMPTY(&oldadp->ad_newdirblk))

1812

panic("allocdirect_merge: extra newdirblk");

1813

WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list);

1814

}

1815

free_allocdirect(adphead, oldadp, 0);

1816

}

1817

1818

1819

* Allocate a new freefrag structure if needed.

1820

1821

static struct freefrag *

1822

newfreefrag(ip, blkno, size)

1823

struct inode *ip;

1824

ufs2_daddr_t blkno;

1825

long size;

1826

{

1827

struct freefrag *freefrag;

1828

struct fs *fs;

1829

1830

if (blkno == 0)

1831

return (NULL);

1832

fs = ip->i_fs;

1833

if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)

1834

panic("newfreefrag: frag size");

1835

MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag),

1836

M_FREEFRAG, M_SOFTDEP_FLAGS);

1837

workitem_alloc(&freefrag->ff_list, D_FREEFRAG, UFSTOVFS(ip->i_ump));

1838

freefrag->ff_inum = ip->i_number;

1839

freefrag->ff_blkno = blkno;

1840

freefrag->ff_fragsize = size;

1841

return (freefrag);

1842

}

1843

1844

1845

* This workitem de-allocates fragments that were replaced during

1846

* file block allocation.

1847

1848

static void

1849

handle_workitem_freefrag(freefrag)

1850

struct freefrag *freefrag;

1851

{

1852

struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp);

1853

1854

ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,

1855

freefrag->ff_fragsize, freefrag->ff_inum);

1856

ACQUIRE_LOCK(&lk);

1857

WORKITEM_FREE(freefrag, D_FREEFRAG);

1858

FREE_LOCK(&lk);

1859

}

1860

1861

1862

* Set up a dependency structure for an external attributes data block.

1863

* This routine follows much of the structure of softdep_setup_allocdirect.

1864

* See the description of softdep_setup_allocdirect above for details.

1865

1866

void

1867

softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)

1868

struct inode *ip;

1869

ufs_lbn_t lbn;

1870

ufs2_daddr_t newblkno;

1871

ufs2_daddr_t oldblkno;

1872

long newsize;

1873

long oldsize;

1874

struct buf *bp;

1875

{

1876

struct allocdirect *adp, *oldadp;

1877

struct allocdirectlst *adphead;

1878

struct bmsafemap *bmsafemap;

1879

struct inodedep *inodedep;

1880

struct newblk *newblk;

1881

struct mount *mp;

1882

1883

mp = UFSTOVFS(ip->i_ump);

1884

MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),

1885

M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO);

1886

workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp);

1887

adp->ad_lbn = lbn;

1888

adp->ad_newblkno = newblkno;

1889

adp->ad_oldblkno = oldblkno;

1890

adp->ad_newsize = newsize;

1891

adp->ad_oldsize = oldsize;

1892

adp->ad_state = ATTACHED | EXTDATA;

1893

LIST_INIT(&adp->ad_newdirblk);

1894

if (newblkno == oldblkno)

1895

adp->ad_freefrag = NULL;

1896

else

1897

adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);

1898

1899

ACQUIRE_LOCK(&lk);

1900

if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)

1901

panic("softdep_setup_allocext: lost block");

1902

1903

inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep);

1904

adp->ad_inodedep = inodedep;

1905

1906

if (newblk->nb_state == DEPCOMPLETE) {

1907

adp->ad_state |= DEPCOMPLETE;

1908

adp->ad_buf = NULL;

1909

} else {

1910

bmsafemap = newblk->nb_bmsafemap;

1911

adp->ad_buf = bmsafemap->sm_buf;

1912

LIST_REMOVE(newblk, nb_deps);

1913

LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);

1914

}

1915

LIST_REMOVE(newblk, nb_hash);

1916

FREE(newblk, M_NEWBLK);

1917

1918

WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);

1919

if (lbn >= NXADDR)

1920

panic("softdep_setup_allocext: lbn %lld > NXADDR",

1921

(long long)lbn);

1922

1923

* The list of allocdirects must be kept in sorted and ascending

1924

* order so that the rollback routines can quickly determine the

1925

* first uncommitted block (the size of the file stored on disk

1926

* ends at the end of the lowest committed fragment, or if there

1927

* are no fragments, at the end of the highest committed block).

1928

* Since files generally grow, the typical case is that the new

1929

* block is to be added at the end of the list. We speed this

1930

* special case by checking against the last allocdirect in the

1931

* list before laboriously traversing the list looking for the

1932

* insertion point.

1933

1934

adphead = &inodedep->id_newextupdt;

1935

oldadp = TAILQ_LAST(adphead, allocdirectlst);

1936

if (oldadp == NULL || oldadp->ad_lbn <= lbn) {

1937

/* insert at end of list */

1938

TAILQ_INSERT_TAIL(adphead, adp, ad_next);

1939

if (oldadp != NULL && oldadp->ad_lbn == lbn)

1940

allocdirect_merge(adphead, adp, oldadp);

1941

FREE_LOCK(&lk);

1942

return;

1943

}

1944

TAILQ_FOREACH(oldadp, adphead, ad_next) {

1945

if (oldadp->ad_lbn >= lbn)

1946

break;

1947

}

1948

if (oldadp == NULL)

1949

panic("softdep_setup_allocext: lost entry");

1950

/* insert in middle of list */

1951

TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);

1952

if (oldadp->ad_lbn == lbn)

1953

allocdirect_merge(adphead, adp, oldadp);

1954

FREE_LOCK(&lk);

1955

}

1956

1957

1958

* Indirect block allocation dependencies.

1959

1960

* The same dependencies that exist for a direct block also exist when

1961

* a new block is allocated and pointed to by an entry in a block of

1962

* indirect pointers. The undo/redo states described above are also

1963

* used here. Because an indirect block contains many pointers that

1964

* may have dependencies, a second copy of the entire in-memory indirect

1965

* block is kept. The buffer cache copy is always completely up-to-date.

1966

* The second copy, which is used only as a source for disk writes,

1967

* contains only the safe pointers (i.e., those that have no remaining

1968

* update dependencies). The second copy is freed when all pointers

1969

* are safe. The cache is not allowed to replace indirect blocks with

1970

* pending update dependencies. If a buffer containing an indirect

1971

* block with dependencies is written, these routines will mark it

1972

* dirty again. It can only be successfully written once all the

1973

* dependencies are removed. The ffs_fsync routine in conjunction with

1974

* softdep_sync_metadata work together to get all the dependencies

1975

* removed so that a file can be successfully written to disk. Three

1976

* procedures are used when setting up indirect block pointer

1977

* dependencies. The division is necessary because of the organization

1978

* of the "balloc" routine and because of the distinction between file

1979

* pages and file metadata blocks.

1980

1981

1982

1983

* Allocate a new allocindir structure.

1984

1985

static struct allocindir *

1986

newallocindir(ip, ptrno, newblkno, oldblkno)

1987

struct inode *ip; /* inode for file being extended */

1988

int ptrno; /* offset of pointer in indirect block */

1989

ufs2_daddr_t newblkno; /* disk block number being added */

1990

ufs2_daddr_t oldblkno; /* previous block number, 0 if none */

1991

{

1992

struct allocindir *aip;

1993

1994

MALLOC(aip, struct allocindir *, sizeof(struct allocindir),

1995

M_ALLOCINDIR, M_SOFTDEP_FLAGS|M_ZERO);

1996

workitem_alloc(&aip->ai_list, D_ALLOCINDIR, UFSTOVFS(ip->i_ump));

1997

aip->ai_state = ATTACHED;

1998

aip->ai_offset = ptrno;

1999

aip->ai_newblkno = newblkno;

2000

aip->ai_oldblkno = oldblkno;

2001

aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize);

2002

return (aip);

2003

}

2004

2005

2006

* Called just before setting an indirect block pointer

2007

* to a newly allocated file page.

2008

2009

void

2010

softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)

2011

struct inode *ip; /* inode for file being extended */

2012

ufs_lbn_t lbn; /* allocated block number within file */

2013

struct buf *bp; /* buffer with indirect blk referencing page */

2014

int ptrno; /* offset of pointer in indirect block */

2015

ufs2_daddr_t newblkno; /* disk block number being added */

2016

ufs2_daddr_t oldblkno; /* previous block number, 0 if none */

2017

struct buf *nbp; /* buffer holding allocated page */

2018

{

2019

struct allocindir *aip;

2020

struct pagedep *pagedep;

2021

2022

ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");

2023

aip = newallocindir(ip, ptrno, newblkno, oldblkno);

2024

ACQUIRE_LOCK(&lk);

2025

2026

* If we are allocating a directory page, then we must

2027

* allocate an associated pagedep to track additions and

2028

* deletions.

2029

2030

if ((ip->i_mode & IFMT) == IFDIR &&

2031

pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)

2032

WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);

2033

WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);

2034

setup_allocindir_phase2(bp, ip, aip);

2035

FREE_LOCK(&lk);

2036

}

2037

2038

2039

* Called just before setting an indirect block pointer to a

2040

* newly allocated indirect block.

2041

2042

void

2043

softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)

2044

struct buf *nbp; /* newly allocated indirect block */

2045

struct inode *ip; /* inode for file being extended */

2046

struct buf *bp; /* indirect block referencing allocated block */

2047

int ptrno; /* offset of pointer in indirect block */

2048

ufs2_daddr_t newblkno; /* disk block number being added */

2049

{

2050

struct allocindir *aip;

2051

2052

ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");

2053

aip = newallocindir(ip, ptrno, newblkno, 0);

2054

ACQUIRE_LOCK(&lk);

2055

WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);

2056

setup_allocindir_phase2(bp, ip, aip);

2057

FREE_LOCK(&lk);

2058

}

2059

2060

2061

* Called to finish the allocation of the "aip" allocated

2062

* by one of the two routines above.

2063

2064

static void

2065

setup_allocindir_phase2(bp, ip, aip)

2066

struct buf *bp; /* in-memory copy of the indirect block */

2067

struct inode *ip; /* inode for file being extended */

2068

struct allocindir *aip; /* allocindir allocated by the above routines */

2069

{

2070

struct worklist *wk;

2071

struct indirdep *indirdep, *newindirdep;

2072

struct bmsafemap *bmsafemap;

2073

struct allocindir *oldaip;

2074

struct freefrag *freefrag;

2075

struct newblk *newblk;

2076

ufs2_daddr_t blkno;

2077

2078

mtx_assert(&lk, MA_OWNED);

2079

if (bp->b_lblkno >= 0)

2080

panic("setup_allocindir_phase2: not indir blk");

2081

for (indirdep = NULL, newindirdep = NULL; ; ) {

2082

LIST_FOREACH(wk, &bp->b_dep, wk_list) {

2083

if (wk->wk_type != D_INDIRDEP)

2084

continue;

2085

indirdep = WK_INDIRDEP(wk);

2086

break;

2087

}

2088

if (indirdep == NULL && newindirdep) {

2089

indirdep = newindirdep;

2090

WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);

2091

newindirdep = NULL;

2092

}

2093

if (indirdep) {

2094

if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,

2095

&newblk) == 0)

2096

panic("setup_allocindir: lost block");

2097

if (newblk->nb_state == DEPCOMPLETE) {

2098

aip->ai_state |= DEPCOMPLETE;

2099

aip->ai_buf = NULL;

2100

} else {

2101

bmsafemap = newblk->nb_bmsafemap;

2102

aip->ai_buf = bmsafemap->sm_buf;

2103

LIST_REMOVE(newblk, nb_deps);

2104

LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,

2105

aip, ai_deps);

2106

}

2107

LIST_REMOVE(newblk, nb_hash);

2108

FREE(newblk, M_NEWBLK);

2109

aip->ai_indirdep = indirdep;

2110

2111

* Check to see if there is an existing dependency

2112

* for this block. If there is, merge the old

2113

* dependency into the new one.

2114

2115

if (aip->ai_oldblkno == 0)

2116

oldaip = NULL;

2117

else

2118

2119

LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)

2120

if (oldaip->ai_offset == aip->ai_offset)

2121

break;

2122

freefrag = NULL;

2123

if (oldaip != NULL) {

2124

if (oldaip->ai_newblkno != aip->ai_oldblkno)

2125

panic("setup_allocindir_phase2: blkno");

2126

aip->ai_oldblkno = oldaip->ai_oldblkno;

2127

freefrag = aip->ai_freefrag;

2128

aip->ai_freefrag = oldaip->ai_freefrag;

2129

oldaip->ai_freefrag = NULL;

2130

free_allocindir(oldaip, NULL);

2131

}

2132

LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);

2133

if (ip->i_ump->um_fstype == UFS1)

2134

((ufs1_daddr_t *)indirdep->ir_savebp->b_data)

2135

[aip->ai_offset] = aip->ai_oldblkno;

2136

else

2137

((ufs2_daddr_t *)indirdep->ir_savebp->b_data)

2138

[aip->ai_offset] = aip->ai_oldblkno;

2139

FREE_LOCK(&lk);

2140

if (freefrag != NULL)

2141

handle_workitem_freefrag(freefrag);

2142

} else

2143

FREE_LOCK(&lk);

2144

if (newindirdep) {

2145

newindirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE;

2146

brelse(newindirdep->ir_savebp);

2147

ACQUIRE_LOCK(&lk);

2148

WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP);

2149

if (indirdep)

2150

break;

2151

FREE_LOCK(&lk);

2152

}

2153

if (indirdep) {

2154

ACQUIRE_LOCK(&lk);

2155

break;

2156

}

2157

MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),

2158

M_INDIRDEP, M_SOFTDEP_FLAGS);

2159

workitem_alloc(&newindirdep->ir_list, D_INDIRDEP,

2160

UFSTOVFS(ip->i_ump));

2161

newindirdep->ir_state = ATTACHED;

2162

if (ip->i_ump->um_fstype == UFS1)

2163

newindirdep->ir_state |= UFS1FMT;

2164

LIST_INIT(&newindirdep->ir_deplisthd);

2165

LIST_INIT(&newindirdep->ir_donehd);

2166

if (bp->b_blkno == bp->b_lblkno) {

2167

ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,

2168

NULL, NULL);

2169

bp->b_blkno = blkno;

2170

}

2171

newindirdep->ir_savebp =

2172

getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0);

2173

BUF_KERNPROC(newindirdep->ir_savebp);

2174

bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);

2175

ACQUIRE_LOCK(&lk);

2176

}

2177

}

2178

2179

2180

* Block de-allocation dependencies.

2181

2182

* When blocks are de-allocated, the on-disk pointers must be nullified before

2183

* the blocks are made available for use by other files. (The true

2184

* requirement is that old pointers must be nullified before new on-disk

2185

* pointers are set. We chose this slightly more stringent requirement to

2186

* reduce complexity.) Our implementation handles this dependency by updating

2187

* the inode (or indirect block) appropriately but delaying the actual block

2188

* de-allocation (i.e., freemap and free space count manipulation) until

2189

* after the updated versions reach stable storage. After the disk is

2190

* updated, the blocks can be safely de-allocated whenever it is convenient.

2191

* This implementation handles only the common case of reducing a file's

2192

* length to zero. Other cases are handled by the conventional synchronous

2193

* write approach.

2194

2195

* The ffs implementation with which we worked double-checks

2196

* the state of the block pointers and file size as it reduces

2197

* a file's length. Some of this code is replicated here in our

2198

* soft updates implementation. The freeblks->fb_chkcnt field is

2199

* used to transfer a part of this information to the procedure

2200

* that eventually de-allocates the blocks.

2201

2202

* This routine should be called from the routine that shortens

2203

* a file's length, before the inode's size or block pointers

2204

* are modified. It will save the block pointer information for

2205

* later release and zero the inode so that the calling routine

2206

* can release it.

2207

2208

void

2209

softdep_setup_freeblocks(ip, length, flags)

2210

struct inode *ip; /* The inode whose length is to be reduced */

2211

off_t length; /* The new length for the file */

2212

int flags; /* IO_EXT and/or IO_NORMAL */

2213

{

2214

struct freeblks *freeblks;

2215

struct inodedep *inodedep;

2216

struct allocdirect *adp;

2217

struct vnode *vp;

2218

struct buf *bp;

2219

struct fs *fs;

2220

ufs2_daddr_t extblocks, datablocks;

2221

struct mount *mp;

2222

int i, delay, error;

2223

2224

fs = ip->i_fs;

2225

mp = UFSTOVFS(ip->i_ump);

2226

if (length != 0)

2227

panic("softdep_setup_freeblocks: non-zero length");

2228

MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),

2229

M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO);

2230

workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp);

2231

freeblks->fb_state = ATTACHED;

2232

freeblks->fb_uid = ip->i_uid;

2233

freeblks->fb_previousinum = ip->i_number;

2234

freeblks->fb_devvp = ip->i_devvp;

2235

ACQUIRE_LOCK(&lk);

2236

num_freeblkdep++;

2237

FREE_LOCK(&lk);

2238

extblocks = 0;

2239

if (fs->fs_magic == FS_UFS2_MAGIC)

2240

extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));

2241

datablocks = DIP(ip, i_blocks) - extblocks;

2242

if ((flags & IO_NORMAL) == 0) {

2243

freeblks->fb_oldsize = 0;

2244

freeblks->fb_chkcnt = 0;

2245

} else {

2246

freeblks->fb_oldsize = ip->i_size;

2247

ip->i_size = 0;

2248

DIP_SET(ip, i_size, 0);

2249

freeblks->fb_chkcnt = datablocks;

2250

for (i = 0; i < NDADDR; i++) {

2251

freeblks->fb_dblks[i] = DIP(ip, i_db[i]);

2252

DIP_SET(ip, i_db[i], 0);

2253

}

2254

for (i = 0; i < NIADDR; i++) {

2255

freeblks->fb_iblks[i] = DIP(ip, i_ib[i]);

2256

DIP_SET(ip, i_ib[i], 0);

2257

}

2258

2259

* If the file was removed, then the space being freed was

2260

* accounted for then (see softdep_releasefile()). If the

2261

* file is merely being truncated, then we account for it now.

2262

2263

if ((ip->i_flag & IN_SPACECOUNTED) == 0) {

2264

UFS_LOCK(ip->i_ump);

2265

fs->fs_pendingblocks += datablocks;

2266

UFS_UNLOCK(ip->i_ump);

2267

}

2268

}

2269

if ((flags & IO_EXT) == 0) {

2270

freeblks->fb_oldextsize = 0;

2271

} else {

2272

freeblks->fb_oldextsize = ip->i_din2->di_extsize;

2273

ip->i_din2->di_extsize = 0;

2274

freeblks->fb_chkcnt += extblocks;

2275

for (i = 0; i < NXADDR; i++) {

2276

freeblks->fb_eblks[i] = ip->i_din2->di_extb[i];

2277

ip->i_din2->di_extb[i] = 0;

2278

}

2279

}

2280

DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - freeblks->fb_chkcnt);

2281

2282

* Push the zero'ed inode to to its disk buffer so that we are free

2283

* to delete its dependencies below. Once the dependencies are gone

2284

* the buffer can be safely released.

2285

2286

if ((error = bread(ip->i_devvp,

2287

fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),

2288

(int)fs->fs_bsize, NOCRED, &bp)) != 0) {

2289

brelse(bp);

2290

softdep_error("softdep_setup_freeblocks", error);

2291

}

2292

if (ip->i_ump->um_fstype == UFS1)

2293

*((struct ufs1_dinode *)bp->b_data +

2294

ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;

2295

else

2296

*((struct ufs2_dinode *)bp->b_data +

2297

ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;

2298

2299

* Find and eliminate any inode dependencies.

2300

2301

ACQUIRE_LOCK(&lk);

2302

(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);

2303

if ((inodedep->id_state & IOSTARTED) != 0)

2304

panic("softdep_setup_freeblocks: inode busy");

2305

2306

* Add the freeblks structure to the list of operations that

2307

* must await the zero'ed inode being written to disk. If we

2308

* still have a bitmap dependency (delay == 0), then the inode

2309

* has never been written to disk, so we can process the

2310

* freeblks below once we have deleted the dependencies.

2311

2312

delay = (inodedep->id_state & DEPCOMPLETE);

2313

if (delay)

2314

WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);

2315

2316

* Because the file length has been truncated to zero, any

2317

* pending block allocation dependency structures associated

2318

* with this inode are obsolete and can simply be de-allocated.

2319

* We must first merge the two dependency lists to get rid of

2320

* any duplicate freefrag structures, then purge the merged list.

2321

* If we still have a bitmap dependency, then the inode has never

2322

* been written to disk, so we can free any fragments without delay.

2323

2324

if (flags & IO_NORMAL) {

2325

merge_inode_lists(&inodedep->id_newinoupdt,

2326

&inodedep->id_inoupdt);

2327

while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)

2328

free_allocdirect(&inodedep->id_inoupdt, adp, delay);

2329

}

2330

if (flags & IO_EXT) {

2331

merge_inode_lists(&inodedep->id_newextupdt,

2332

&inodedep->id_extupdt);

2333

while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)

2334

free_allocdirect(&inodedep->id_extupdt, adp, delay);

2335

}

2336

FREE_LOCK(&lk);

2337

bdwrite(bp);

2338

2339

* We must wait for any I/O in progress to finish so that

2340

* all potential buffers on the dirty list will be visible.

2341

* Once they are all there, walk the list and get rid of

2342

* any dependencies.

2343

2344

vp = ITOV(ip);

2345

VI_LOCK(vp);

2346

drain_output(vp);

2347

restart:

2348

TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {

2349

if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) ||

2350

((flags & IO_NORMAL) == 0 &&

2351

(bp->b_xflags & BX_ALTDATA) == 0))

2352

continue;

2353

if ((bp = getdirtybuf(bp, VI_MTX(vp), MNT_WAIT)) == NULL)

2354

goto restart;

2355

VI_UNLOCK(vp);

2356

ACQUIRE_LOCK(&lk);

2357

(void) inodedep_lookup(mp, ip->i_number, 0, &inodedep);

2358

deallocate_dependencies(bp, inodedep);

2359

FREE_LOCK(&lk);

2360

bp->b_flags |= B_INVAL | B_NOCACHE;

2361

brelse(bp);

2362

VI_LOCK(vp);

2363

goto restart;

2364

}

2365

VI_UNLOCK(vp);

2366

ACQUIRE_LOCK(&lk);

2367

if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)

2368

(void) free_inodedep(inodedep);

2369

2370

if(delay) {

2371

freeblks->fb_state |= DEPCOMPLETE;

2372

2373

* If the inode with zeroed block pointers is now on disk

2374

* we can start freeing blocks. Add freeblks to the worklist

2375

* instead of calling handle_workitem_freeblocks directly as

2376

* it is more likely that additional IO is needed to complete

2377

* the request here than in the !delay case.

2378

2379

if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)

2380

add_to_worklist(&freeblks->fb_list);

2381

}

2382

2383

FREE_LOCK(&lk);

2384

2385

* If the inode has never been written to disk (delay == 0),

2386

* then we can process the freeblks now that we have deleted

2387

* the dependencies.

2388

2389

if (!delay)

2390

handle_workitem_freeblocks(freeblks, 0);

2391

}

2392

2393

2394

* Reclaim any dependency structures from a buffer that is about to

2395

* be reallocated to a new vnode. The buffer must be locked, thus,

2396

* no I/O completion operations can occur while we are manipulating

2397

* its associated dependencies. The mutex is held so that other I/O's

2398

* associated with related dependencies do not occur.

2399

2400

static void

2401

deallocate_dependencies(bp, inodedep)

2402

struct buf *bp;

2403

struct inodedep *inodedep;

2404

{

2405

struct worklist *wk;

2406

struct indirdep *indirdep;

2407

struct allocindir *aip;

2408

struct pagedep *pagedep;

2409

struct dirrem *dirrem;

2410

struct diradd *dap;

2411

int i;

2412

2413

mtx_assert(&lk, MA_OWNED);

2414

while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {

2415

switch (wk->wk_type) {

2416

2417

case D_INDIRDEP:

2418

indirdep = WK_INDIRDEP(wk);

2419

2420

* None of the indirect pointers will ever be visible,

2421

* so they can simply be tossed. GOINGAWAY ensures

2422

* that allocated pointers will be saved in the buffer

2423

* cache until they are freed. Note that they will

2424

* only be able to be found by their physical address

2425

* since the inode mapping the logical address will

2426

* be gone. The save buffer used for the safe copy

2427

* was allocated in setup_allocindir_phase2 using

2428

* the physical address so it could be used for this

2429

* purpose. Hence we swap the safe copy with the real

2430

* copy, allowing the safe copy to be freed and holding

2431

* on to the real copy for later use in indir_trunc.

2432

2433

if (indirdep->ir_state & GOINGAWAY)

2434

panic("deallocate_dependencies: already gone");

2435

indirdep->ir_state |= GOINGAWAY;

2436

VFSTOUFS(bp->b_vp->v_mount)->um_numindirdeps += 1;

2437

while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)

2438

free_allocindir(aip, inodedep);

2439

if (bp->b_lblkno >= 0 ||

2440

bp->b_blkno != indirdep->ir_savebp->b_lblkno)

2441

panic("deallocate_dependencies: not indir");

2442

bcopy(bp->b_data, indirdep->ir_savebp->b_data,

2443

bp->b_bcount);

2444

WORKLIST_REMOVE(wk);

2445

WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);

2446

continue;

2447

2448

case D_PAGEDEP:

2449

pagedep = WK_PAGEDEP(wk);

2450

2451

* None of the directory additions will ever be

2452

* visible, so they can simply be tossed.

2453

2454

for (i = 0; i < DAHASHSZ; i++)

2455

while ((dap =

2456

LIST_FIRST(&pagedep->pd_diraddhd[i])))

2457

free_diradd(dap);

2458

while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0)

2459

free_diradd(dap);

2460

2461

* Copy any directory remove dependencies to the list

2462

* to be processed after the zero'ed inode is written.

2463

* If the inode has already been written, then they

2464

* can be dumped directly onto the work list.

2465

2466

LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {

2467

LIST_REMOVE(dirrem, dm_next);

2468

dirrem->dm_dirinum = pagedep->pd_ino;

2469

if (inodedep == NULL ||

2470

(inodedep->id_state & ALLCOMPLETE) ==

2471

ALLCOMPLETE)

2472

add_to_worklist(&dirrem->dm_list);

2473

else

2474

WORKLIST_INSERT(&inodedep->id_bufwait,

2475

&dirrem->dm_list);

2476

}

2477

if ((pagedep->pd_state & NEWBLOCK) != 0) {

2478

LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list)

2479

if (wk->wk_type == D_NEWDIRBLK &&

2480

WK_NEWDIRBLK(wk)->db_pagedep ==

2481

pagedep)

2482

break;

2483

if (wk != NULL) {

2484

WORKLIST_REMOVE(wk);

2485

free_newdirblk(WK_NEWDIRBLK(wk));

2486

} else

2487

panic("deallocate_dependencies: "

2488

"lost pagedep");

2489

}

2490

WORKLIST_REMOVE(&pagedep->pd_list);

2491

LIST_REMOVE(pagedep, pd_hash);

2492

WORKITEM_FREE(pagedep, D_PAGEDEP);

2493

continue;

2494

2495

case D_ALLOCINDIR:

2496

free_allocindir(WK_ALLOCINDIR(wk), inodedep);

2497

continue;

2498

2499

case D_ALLOCDIRECT:

2500

case D_INODEDEP:

2501

panic("deallocate_dependencies: Unexpected type %s",

2502

TYPENAME(wk->wk_type));

2503

/* NOTREACHED */

2504

2505

default:

2506

panic("deallocate_dependencies: Unknown type %s",

2507

TYPENAME(wk->wk_type));

2508

/* NOTREACHED */

2509

}

2510

}

2511

}

2512

2513

2514

* Free an allocdirect. Generate a new freefrag work request if appropriate.

2515

* This routine must be called with splbio interrupts blocked.

2516

2517

static void

2518

free_allocdirect(adphead, adp, delay)

2519

struct allocdirectlst *adphead;

2520

struct allocdirect *adp;

2521

int delay;

2522

{

2523

struct newdirblk *newdirblk;

2524

struct worklist *wk;

2525

2526

mtx_assert(&lk, MA_OWNED);

2527

if ((adp->ad_state & DEPCOMPLETE) == 0)

2528

LIST_REMOVE(adp, ad_deps);

2529

TAILQ_REMOVE(adphead, adp, ad_next);

2530

if ((adp->ad_state & COMPLETE) == 0)

2531

WORKLIST_REMOVE(&adp->ad_list);

2532

if (adp->ad_freefrag != NULL) {

2533

if (delay)

2534

WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,

2535

&adp->ad_freefrag->ff_list);

2536

else

2537

add_to_worklist(&adp->ad_freefrag->ff_list);

2538

}

2539

if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) {

2540

newdirblk = WK_NEWDIRBLK(wk);

2541

WORKLIST_REMOVE(&newdirblk->db_list);

2542

if (!LIST_EMPTY(&adp->ad_newdirblk))

2543

panic("free_allocdirect: extra newdirblk");

2544

if (delay)

2545

WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,

2546

&newdirblk->db_list);

2547

else

2548

free_newdirblk(newdirblk);

2549

}

2550

WORKITEM_FREE(adp, D_ALLOCDIRECT);

2551

}

2552

2553

2554

* Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.

2555

* This routine must be called with splbio interrupts blocked.

2556

2557

static void

2558

free_newdirblk(newdirblk)

2559

struct newdirblk *newdirblk;

2560

{

2561

struct pagedep *pagedep;

2562

struct diradd *dap;

2563

int i;

2564

2565

mtx_assert(&lk, MA_OWNED);

2566

2567

* If the pagedep is still linked onto the directory buffer

2568

* dependency chain, then some of the entries on the

2569

* pd_pendinghd list may not be committed to disk yet. In

2570

* this case, we will simply clear the NEWBLOCK flag and

2571

* let the pd_pendinghd list be processed when the pagedep

2572

* is next written. If the pagedep is no longer on the buffer

2573

* dependency chain, then all the entries on the pd_pending

2574

* list are committed to disk and we can free them here.

2575

2576

pagedep = newdirblk->db_pagedep;

2577

pagedep->pd_state &= ~NEWBLOCK;

2578

if ((pagedep->pd_state & ONWORKLIST) == 0)

2579

while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)

2580

free_diradd(dap);

2581

2582

* If no dependencies remain, the pagedep will be freed.

2583

2584

for (i = 0; i < DAHASHSZ; i++)

2585

if (!LIST_EMPTY(&pagedep->pd_diraddhd[i]))

2586

break;

2587

if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) {

2588

LIST_REMOVE(pagedep, pd_hash);

2589

WORKITEM_FREE(pagedep, D_PAGEDEP);

2590

}

2591

WORKITEM_FREE(newdirblk, D_NEWDIRBLK);

2592

}

2593

2594

2595

* Prepare an inode to be freed. The actual free operation is not

2596

* done until the zero'ed inode has been written to disk.

2597

2598

void

2599

softdep_freefile(pvp, ino, mode)

2600

struct vnode *pvp;

2601

ino_t ino;

2602

int mode;

2603

{

2604

struct inode *ip = VTOI(pvp);

2605

struct inodedep *inodedep;

2606

struct freefile *freefile;

2607

2608

2609

* This sets up the inode de-allocation dependency.

2610

2611

MALLOC(freefile, struct freefile *, sizeof(struct freefile),

2612

M_FREEFILE, M_SOFTDEP_FLAGS);

2613

workitem_alloc(&freefile->fx_list, D_FREEFILE, pvp->v_mount);

2614

freefile->fx_mode = mode;

2615

freefile->fx_oldinum = ino;

2616

freefile->fx_devvp = ip->i_devvp;

2617

if ((ip->i_flag & IN_SPACECOUNTED) == 0) {

2618

UFS_LOCK(ip->i_ump);

2619

ip->i_fs->fs_pendinginodes += 1;

2620

UFS_UNLOCK(ip->i_ump);

2621

}

2622

2623

2624

* If the inodedep does not exist, then the zero'ed inode has

2625

* been written to disk. If the allocated inode has never been

2626

* written to disk, then the on-disk inode is zero'ed. In either

2627

* case we can free the file immediately.

2628

2629

ACQUIRE_LOCK(&lk);

2630

if (inodedep_lookup(pvp->v_mount, ino, 0, &inodedep) == 0 ||

2631

check_inode_unwritten(inodedep)) {

2632

FREE_LOCK(&lk);

2633

handle_workitem_freefile(freefile);

2634

return;

2635

}

2636

WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);

2637

FREE_LOCK(&lk);

2638

if (ip->i_number == ino)

2639

ip->i_flag |= IN_MODIFIED;

2640

}

2641

2642

2643

* Check to see if an inode has never been written to disk. If

2644

* so free the inodedep and return success, otherwise return failure.

2645

* This routine must be called with splbio interrupts blocked.

2646

2647

* If we still have a bitmap dependency, then the inode has never

2648

* been written to disk. Drop the dependency as it is no longer

2649

* necessary since the inode is being deallocated. We set the

2650

* ALLCOMPLETE flags since the bitmap now properly shows that the

2651

* inode is not allocated. Even if the inode is actively being

2652

* written, it has been rolled back to its zero'ed state, so we

2653

* are ensured that a zero inode is what is on the disk. For short

2654

* lived files, this change will usually result in removing all the

2655

* dependencies from the inode so that it can be freed immediately.

2656

2657

static int

2658

check_inode_unwritten(inodedep)

2659

struct inodedep *inodedep;

2660

{

2661

2662

mtx_assert(&lk, MA_OWNED);

2663

if ((inodedep->id_state & DEPCOMPLETE) != 0 ||

2664

!LIST_EMPTY(&inodedep->id_pendinghd) ||

2665

!LIST_EMPTY(&inodedep->id_bufwait) ||

2666

!LIST_EMPTY(&inodedep->id_inowait) ||

2667

!TAILQ_EMPTY(&inodedep->id_inoupdt) ||

2668

!TAILQ_EMPTY(&inodedep->id_newinoupdt) ||

2669

!TAILQ_EMPTY(&inodedep->id_extupdt) ||

2670

!TAILQ_EMPTY(&inodedep->id_newextupdt) ||

2671

inodedep->id_nlinkdelta != 0)

2672

return (0);

2673

2674

2675

* Another process might be in initiate_write_inodeblock_ufs[12]

2676

* trying to allocate memory without holding "Softdep Lock".

2677

2678

if ((inodedep->id_state & IOSTARTED) != 0 &&

2679

inodedep->id_savedino1 == NULL)

2680

return (0);

2681

2682

inodedep->id_state |= ALLCOMPLETE;

2683

LIST_REMOVE(inodedep, id_deps);

2684

inodedep->id_buf = NULL;

2685

if (inodedep->id_state & ONWORKLIST)

2686

WORKLIST_REMOVE(&inodedep->id_list);

2687

if (inodedep->id_savedino1 != NULL) {

2688

FREE(inodedep->id_savedino1, M_SAVEDINO);

2689

inodedep->id_savedino1 = NULL;

2690

}

2691

if (free_inodedep(inodedep) == 0)

2692

panic("check_inode_unwritten: busy inode");

2693

return (1);

2694

}

2695

2696

2697

* Try to free an inodedep structure. Return 1 if it could be freed.

2698

2699

static int

2700

free_inodedep(inodedep)

2701

struct inodedep *inodedep;

2702

{

2703

2704

mtx_assert(&lk, MA_OWNED);

2705

if ((inodedep->id_state & ONWORKLIST) != 0 ||

2706

(inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||

2707

!LIST_EMPTY(&inodedep->id_pendinghd) ||

2708

!LIST_EMPTY(&inodedep->id_bufwait) ||

2709

!LIST_EMPTY(&inodedep->id_inowait) ||

2710

!TAILQ_EMPTY(&inodedep->id_inoupdt) ||

2711

!TAILQ_EMPTY(&inodedep->id_newinoupdt) ||

2712

!TAILQ_EMPTY(&inodedep->id_extupdt) ||

2713

!TAILQ_EMPTY(&inodedep->id_newextupdt) ||

2714

inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL)

2715

return (0);

2716

LIST_REMOVE(inodedep, id_hash);

2717

WORKITEM_FREE(inodedep, D_INODEDEP);

2718

num_inodedep -= 1;

2719

return (1);

2720

}

2721

2722

2723

* This workitem routine performs the block de-allocation.

2724

* The workitem is added to the pending list after the updated

2725

* inode block has been written to disk. As mentioned above,

2726

* checks regarding the number of blocks de-allocated (compared

2727

* to the number of blocks allocated for the file) are also

2728

* performed in this function.

2729

2730

static void

2731

handle_workitem_freeblocks(freeblks, flags)

2732

struct freeblks *freeblks;

2733

int flags;

2734

{

2735

struct inode *ip;

2736

struct vnode *vp;

2737

struct fs *fs;

2738

struct ufsmount *ump;

2739

int i, nblocks, level, bsize;

2740

ufs2_daddr_t bn, blocksreleased = 0;

2741

int error, allerror = 0;

2742

ufs_lbn_t baselbns[NIADDR], tmpval;

2743

int fs_pendingblocks;

2744

2745

ump = VFSTOUFS(freeblks->fb_list.wk_mp);

2746

fs = ump->um_fs;

2747

fs_pendingblocks = 0;

2748

tmpval = 1;

2749

baselbns[0] = NDADDR;

2750

for (i = 1; i < NIADDR; i++) {

2751

tmpval *= NINDIR(fs);

2752

baselbns[i] = baselbns[i - 1] + tmpval;

2753

}

2754

nblocks = btodb(fs->fs_bsize);

2755

blocksreleased = 0;

2756

2757

* Release all extended attribute blocks or frags.

2758

2759

if (freeblks->fb_oldextsize > 0) {

2760

for (i = (NXADDR - 1); i >= 0; i--) {

2761

if ((bn = freeblks->fb_eblks[i]) == 0)

2762

continue;

2763

bsize = sblksize(fs, freeblks->fb_oldextsize, i);

2764

ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize,

2765

freeblks->fb_previousinum);

2766

blocksreleased += btodb(bsize);

2767

}

2768

}

2769

2770

* Release all data blocks or frags.

2771

2772

if (freeblks->fb_oldsize > 0) {

2773

2774

* Indirect blocks first.

2775

2776

for (level = (NIADDR - 1); level >= 0; level--) {

2777

if ((bn = freeblks->fb_iblks[level]) == 0)

2778

continue;

2779

if ((error = indir_trunc(freeblks, fsbtodb(fs, bn),

2780

level, baselbns[level], &blocksreleased)) != 0)

2781

allerror = error;

2782

ffs_blkfree(ump, fs, freeblks->fb_devvp, bn,

2783

fs->fs_bsize, freeblks->fb_previousinum);

2784

fs_pendingblocks += nblocks;

2785

blocksreleased += nblocks;

2786

}

2787

2788

* All direct blocks or frags.

2789

2790

for (i = (NDADDR - 1); i >= 0; i--) {

2791

if ((bn = freeblks->fb_dblks[i]) == 0)

2792

continue;

2793

bsize = sblksize(fs, freeblks->fb_oldsize, i);

2794

ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize,

2795

freeblks->fb_previousinum);

2796

fs_pendingblocks += btodb(bsize);

2797

blocksreleased += btodb(bsize);

2798

}

2799

}

2800

UFS_LOCK(ump);

2801

fs->fs_pendingblocks -= fs_pendingblocks;

2802

UFS_UNLOCK(ump);

2803

2804

* If we still have not finished background cleanup, then check

2805

* to see if the block count needs to be adjusted.

2806

2807

if (freeblks->fb_chkcnt != blocksreleased &&

2808

(fs->fs_flags & FS_UNCLEAN) != 0 &&

2809

ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum,

2810

(flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)

2811

== 0) {

2812

ip = VTOI(vp);

2813

DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + \

2814

freeblks->fb_chkcnt - blocksreleased);

2815

ip->i_flag |= IN_CHANGE;

2816

vput(vp);

2817

}

2818

2819

#ifdef INVARIANTS

2820

if (freeblks->fb_chkcnt != blocksreleased &&

2821

((fs->fs_flags & FS_UNCLEAN) == 0 || (flags & LK_NOWAIT) != 0))

2822

printf("handle_workitem_freeblocks: block count\n");

2823

if (allerror)

2824

softdep_error("handle_workitem_freeblks", allerror);

2825

#endif /* INVARIANTS */

2826

2827

ACQUIRE_LOCK(&lk);

2828

WORKITEM_FREE(freeblks, D_FREEBLKS);

2829

num_freeblkdep--;

2830

FREE_LOCK(&lk);

2831

}

2832

2833

2834

* Release blocks associated with the inode ip and stored in the indirect

2835

* block dbn. If level is greater than SINGLE, the block is an indirect block

2836

* and recursive calls to indirtrunc must be used to cleanse other indirect

2837

* blocks.

2838

2839

static int

2840

indir_trunc(freeblks, dbn, level, lbn, countp)

2841

struct freeblks *freeblks;

2842

ufs2_daddr_t dbn;

2843

int level;

2844

ufs_lbn_t lbn;

2845

ufs2_daddr_t *countp;

2846

{

2847

struct buf *bp;

2848

struct fs *fs;

2849

struct worklist *wk;

2850

struct indirdep *indirdep;

2851

struct ufsmount *ump;

2852

ufs1_daddr_t *bap1 = 0;

2853

ufs2_daddr_t nb, *bap2 = 0;

2854

ufs_lbn_t lbnadd;

2855

int i, nblocks, ufs1fmt;

2856

int error, allerror = 0;

2857

int fs_pendingblocks;

2858

2859

ump = VFSTOUFS(freeblks->fb_list.wk_mp);

2860

fs = ump->um_fs;

2861

fs_pendingblocks = 0;

2862

lbnadd = 1;

2863

for (i = level; i > 0; i--)

2864

lbnadd *= NINDIR(fs);

2865

2866

* Get buffer of block pointers to be freed. This routine is not

2867

* called until the zero'ed inode has been written, so it is safe

2868

* to free blocks as they are encountered. Because the inode has

2869

* been zero'ed, calls to bmap on these blocks will fail. So, we

2870

* have to use the on-disk address and the block device for the

2871

* filesystem to look them up. If the file was deleted before its

2872

* indirect blocks were all written to disk, the routine that set

2873

* us up (deallocate_dependencies) will have arranged to leave

2874

* a complete copy of the indirect block in memory for our use.

2875

* Otherwise we have to read the blocks in from the disk.

2876

2877

#ifdef notyet

2878

bp = getblk(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, 0, 0,

2879

GB_NOCREAT);

2880

#else

2881

bp = incore(&freeblks->fb_devvp->v_bufobj, dbn);

2882

#endif

2883

ACQUIRE_LOCK(&lk);

2884

if (bp != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) {

2885

if (wk->wk_type != D_INDIRDEP ||

2886

(indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||

2887

(indirdep->ir_state & GOINGAWAY) == 0)

2888

panic("indir_trunc: lost indirdep");

2889

WORKLIST_REMOVE(wk);

2890

WORKITEM_FREE(indirdep, D_INDIRDEP);

2891

if (!LIST_EMPTY(&bp->b_dep))

2892

panic("indir_trunc: dangling dep");

2893

ump->um_numindirdeps -= 1;

2894

FREE_LOCK(&lk);

2895

} else {

2896

#ifdef notyet

2897

if (bp)

2898

brelse(bp);

2899

#endif

2900

FREE_LOCK(&lk);

2901

error = bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,

2902

NOCRED, &bp);

2903

if (error) {

2904

brelse(bp);

2905

return (error);

2906

}

2907

}

2908

2909

* Recursively free indirect blocks.

2910

2911

if (ump->um_fstype == UFS1) {

2912

ufs1fmt = 1;

2913

bap1 = (ufs1_daddr_t *)bp->b_data;

2914

} else {

2915

ufs1fmt = 0;

2916

bap2 = (ufs2_daddr_t *)bp->b_data;

2917

}

2918

nblocks = btodb(fs->fs_bsize);

2919

for (i = NINDIR(fs) - 1; i >= 0; i--) {

2920

if (ufs1fmt)

2921

nb = bap1[i];

2922

else

2923

nb = bap2[i];

2924

if (nb == 0)

2925

continue;

2926

if (level != 0) {

2927

if ((error = indir_trunc(freeblks, fsbtodb(fs, nb),

2928

level - 1, lbn + (i * lbnadd), countp)) != 0)

2929

allerror = error;

2930

}

2931

ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, fs->fs_bsize,

2932

freeblks->fb_previousinum);

2933

fs_pendingblocks += nblocks;

2934

*countp += nblocks;

2935

}

2936

UFS_LOCK(ump);

2937

fs->fs_pendingblocks -= fs_pendingblocks;

2938

UFS_UNLOCK(ump);

2939

bp->b_flags |= B_INVAL | B_NOCACHE;

2940

brelse(bp);

2941

return (allerror);

2942

}

2943

2944

2945

* Free an allocindir.

2946

* This routine must be called with splbio interrupts blocked.

2947

2948

static void

2949

free_allocindir(aip, inodedep)

2950

struct allocindir *aip;

2951

struct inodedep *inodedep;

2952

{

2953

struct freefrag *freefrag;

2954

2955

mtx_assert(&lk, MA_OWNED);

2956

if ((aip->ai_state & DEPCOMPLETE) == 0)

2957

LIST_REMOVE(aip, ai_deps);

2958

if (aip->ai_state & ONWORKLIST)

2959

WORKLIST_REMOVE(&aip->ai_list);

2960

LIST_REMOVE(aip, ai_next);

2961

if ((freefrag = aip->ai_freefrag) != NULL) {

2962

if (inodedep == NULL)

2963

add_to_worklist(&freefrag->ff_list);

2964

else

2965

WORKLIST_INSERT(&inodedep->id_bufwait,

2966

&freefrag->ff_list);

2967

}

2968

WORKITEM_FREE(aip, D_ALLOCINDIR);

2969

}

2970

2971

2972

* Directory entry addition dependencies.

2973

2974

* When adding a new directory entry, the inode (with its incremented link

2975

* count) must be written to disk before the directory entry's pointer to it.

2976

* Also, if the inode is newly allocated, the corresponding freemap must be

2977

* updated (on disk) before the directory entry's pointer. These requirements

2978

* are met via undo/redo on the directory entry's pointer, which consists

2979

* simply of the inode number.

2980

2981

* As directory entries are added and deleted, the free space within a

2982

* directory block can become fragmented. The ufs filesystem will compact

2983

* a fragmented directory block to make space for a new entry. When this

2984

* occurs, the offsets of previously added entries change. Any "diradd"

2985

* dependency structures corresponding to these entries must be updated with

2986

* the new offsets.

2987

2988

2989

2990

* This routine is called after the in-memory inode's link

2991

* count has been incremented, but before the directory entry's

2992

* pointer to the inode has been set.

2993

2994

int

2995

softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)

2996

struct buf *bp; /* buffer containing directory block */

2997

struct inode *dp; /* inode for directory */

2998

off_t diroffset; /* offset of new entry in directory */

2999

ino_t newinum; /* inode referenced by new directory entry */

3000

struct buf *newdirbp; /* non-NULL => contents of new mkdir */

3001

int isnewblk; /* entry is in a newly allocated block */

3002

{

3003

int offset; /* offset of new entry within directory block */

3004

ufs_lbn_t lbn; /* block in directory containing new entry */

3005

struct fs *fs;

3006

struct diradd *dap;

3007

struct allocdirect *adp;

3008

struct pagedep *pagedep;

3009

struct inodedep *inodedep;

3010

struct newdirblk *newdirblk = 0;

3011

struct mkdir *mkdir1, *mkdir2;

3012

struct mount *mp;

3013

3014

3015

* Whiteouts have no dependencies.

3016

3017

if (newinum == WINO) {

3018

if (newdirbp != NULL)

3019

bdwrite(newdirbp);

3020

return (0);

3021

}

3022

mp = UFSTOVFS(dp->i_ump);

3023

fs = dp->i_fs;

3024

lbn = lblkno(fs, diroffset);

3025

offset = blkoff(fs, diroffset);

3026

MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD,

3027

M_SOFTDEP_FLAGS|M_ZERO);

3028

workitem_alloc(&dap->da_list, D_DIRADD, mp);

3029

dap->da_offset = offset;

3030

dap->da_newinum = newinum;

3031

dap->da_state = ATTACHED;

3032

if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) {

3033

MALLOC(newdirblk, struct newdirblk *, sizeof(struct newdirblk),

3034

M_NEWDIRBLK, M_SOFTDEP_FLAGS);

3035

workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);

3036

}

3037

if (newdirbp == NULL) {

3038

dap->da_state |= DEPCOMPLETE;

3039

ACQUIRE_LOCK(&lk);

3040

} else {

3041

dap->da_state |= MKDIR_BODY | MKDIR_PARENT;

3042

MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR,

3043

M_SOFTDEP_FLAGS);

3044

workitem_alloc(&mkdir1->md_list, D_MKDIR, mp);

3045

mkdir1->md_state = MKDIR_BODY;

3046

mkdir1->md_diradd = dap;

3047

MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR,

3048

M_SOFTDEP_FLAGS);

3049

workitem_alloc(&mkdir2->md_list, D_MKDIR, mp);

3050

mkdir2->md_state = MKDIR_PARENT;

3051

mkdir2->md_diradd = dap;

3052

3053

* Dependency on "." and ".." being written to disk.

3054

3055

mkdir1->md_buf = newdirbp;

3056

ACQUIRE_LOCK(&lk);

3057

LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);

3058

WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);

3059

FREE_LOCK(&lk);

3060

bdwrite(newdirbp);

3061

3062

* Dependency on link count increase for parent directory

3063

3064

ACQUIRE_LOCK(&lk);

3065

if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0

3066

|| (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {

3067

dap->da_state &= ~MKDIR_PARENT;

3068

WORKITEM_FREE(mkdir2, D_MKDIR);

3069

} else {

3070

LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);

3071

WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);

3072

}

3073

}

3074

3075

* Link into parent directory pagedep to await its being written.

3076

3077

if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)

3078

WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);

3079

dap->da_pagedep = pagedep;

3080

LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,

3081

da_pdlist);

3082

3083

* Link into its inodedep. Put it on the id_bufwait list if the inode

3084

* is not yet written. If it is written, do the post-inode write

3085

* processing to put it on the id_pendinghd list.

3086

3087

(void) inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);

3088

if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)

3089

diradd_inode_written(dap, inodedep);

3090

else

3091

WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);

3092

if (isnewblk) {

3093

3094

* Directories growing into indirect blocks are rare

3095

* enough and the frequency of new block allocation

3096

* in those cases even more rare, that we choose not

3097

* to bother tracking them. Rather we simply force the

3098

* new directory entry to disk.

3099

3100

if (lbn >= NDADDR) {

3101

FREE_LOCK(&lk);

3102

3103

* We only have a new allocation when at the

3104

* beginning of a new block, not when we are

3105

* expanding into an existing block.

3106

3107

if (blkoff(fs, diroffset) == 0)

3108

return (1);

3109

return (0);

3110

}

3111

3112

* We only have a new allocation when at the beginning

3113

* of a new fragment, not when we are expanding into an

3114

* existing fragment. Also, there is nothing to do if we

3115

* are already tracking this block.

3116

3117

if (fragoff(fs, diroffset) != 0) {

3118

FREE_LOCK(&lk);

3119

return (0);

3120

}

3121

if ((pagedep->pd_state & NEWBLOCK) != 0) {

3122

WORKITEM_FREE(newdirblk, D_NEWDIRBLK);

3123

FREE_LOCK(&lk);

3124

return (0);

3125

}

3126

3127

* Find our associated allocdirect and have it track us.

3128

3129

if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0)

3130

panic("softdep_setup_directory_add: lost inodedep");

3131

adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst);

3132

if (adp == NULL || adp->ad_lbn != lbn)

3133

panic("softdep_setup_directory_add: lost entry");

3134

pagedep->pd_state |= NEWBLOCK;

3135

newdirblk->db_pagedep = pagedep;

3136

WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list);

3137

}

3138

FREE_LOCK(&lk);

3139

return (0);

3140

}

3141

3142

3143

* This procedure is called to change the offset of a directory

3144

* entry when compacting a directory block which must be owned

3145

* exclusively by the caller. Note that the actual entry movement

3146

* must be done in this procedure to ensure that no I/O completions

3147

* occur while the move is in progress.

3148

3149

void

3150

softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)

3151

struct inode *dp; /* inode for directory */

3152

caddr_t base; /* address of dp->i_offset */

3153

caddr_t oldloc; /* address of old directory location */

3154

caddr_t newloc; /* address of new directory location */

3155

int entrysize; /* size of directory entry */

3156

{

3157

int offset, oldoffset, newoffset;

3158

struct pagedep *pagedep;

3159

struct diradd *dap;

3160

ufs_lbn_t lbn;

3161

3162

ACQUIRE_LOCK(&lk);

3163

lbn = lblkno(dp->i_fs, dp->i_offset);

3164

offset = blkoff(dp->i_fs, dp->i_offset);

3165

if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)

3166

goto done;

3167

oldoffset = offset + (oldloc - base);

3168

newoffset = offset + (newloc - base);

3169

3170

LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) {

3171

if (dap->da_offset != oldoffset)

3172

continue;

3173

dap->da_offset = newoffset;

3174

if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset))

3175

break;

3176

LIST_REMOVE(dap, da_pdlist);

3177

LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],

3178

dap, da_pdlist);

3179

break;

3180

}

3181

if (dap == NULL) {

3182

3183

LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) {

3184

if (dap->da_offset == oldoffset) {

3185

dap->da_offset = newoffset;

3186

break;

3187

}

3188

}

3189

}

3190

done:

3191

bcopy(oldloc, newloc, entrysize);

3192

FREE_LOCK(&lk);

3193

}

3194

3195

3196

* Free a diradd dependency structure. This routine must be called

3197

* with splbio interrupts blocked.

3198

3199

static void

3200

free_diradd(dap)

3201

struct diradd *dap;

3202

{

3203

struct dirrem *dirrem;

3204

struct pagedep *pagedep;

3205

struct inodedep *inodedep;

3206

struct mkdir *mkdir, *nextmd;

3207

3208

mtx_assert(&lk, MA_OWNED);

3209

WORKLIST_REMOVE(&dap->da_list);

3210

LIST_REMOVE(dap, da_pdlist);

3211

if ((dap->da_state & DIRCHG) == 0) {

3212

pagedep = dap->da_pagedep;

3213

} else {

3214

dirrem = dap->da_previous;

3215

pagedep = dirrem->dm_pagedep;

3216

dirrem->dm_dirinum = pagedep->pd_ino;

3217

add_to_worklist(&dirrem->dm_list);

3218

}

3219

if (inodedep_lookup(pagedep->pd_list.wk_mp, dap->da_newinum,

3220

0, &inodedep) != 0)

3221

(void) free_inodedep(inodedep);

3222

if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {

3223

for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {

3224

nextmd = LIST_NEXT(mkdir, md_mkdirs);

3225

if (mkdir->md_diradd != dap)

3226

continue;

3227

dap->da_state &= ~mkdir->md_state;

3228

WORKLIST_REMOVE(&mkdir->md_list);

3229

LIST_REMOVE(mkdir, md_mkdirs);

3230

WORKITEM_FREE(mkdir, D_MKDIR);

3231

}

3232

if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)

3233

panic("free_diradd: unfound ref");

3234

}

3235

WORKITEM_FREE(dap, D_DIRADD);

3236

}

3237

3238

3239

* Directory entry removal dependencies.

3240

3241

* When removing a directory entry, the entry's inode pointer must be

3242

* zero'ed on disk before the corresponding inode's link count is decremented

3243

* (possibly freeing the inode for re-use). This dependency is handled by

3244

* updating the directory entry but delaying the inode count reduction until

3245

* after the directory block has been written to disk. After this point, the

3246

* inode count can be decremented whenever it is convenient.

3247

3248

3249

3250

* This routine should be called immediately after removing

3251

* a directory entry. The inode's link count should not be

3252

* decremented by the calling procedure -- the soft updates

3253

* code will do this task when it is safe.

3254

3255

void

3256

softdep_setup_remove(bp, dp, ip, isrmdir)

3257

struct buf *bp; /* buffer containing directory block */

3258

struct inode *dp; /* inode for the directory being modified */

3259

struct inode *ip; /* inode for directory entry being removed */

3260

int isrmdir; /* indicates if doing RMDIR */

3261

{

3262

struct dirrem *dirrem, *prevdirrem;

3263

3264

3265

* Allocate a new dirrem if appropriate and ACQUIRE_LOCK.

3266

3267

dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);

3268

3269

3270

* If the COMPLETE flag is clear, then there were no active

3271

* entries and we want to roll back to a zeroed entry until

3272

* the new inode is committed to disk. If the COMPLETE flag is

3273

* set then we have deleted an entry that never made it to

3274

* disk. If the entry we deleted resulted from a name change,

3275

* then the old name still resides on disk. We cannot delete

3276

* its inode (returned to us in prevdirrem) until the zeroed

3277

* directory entry gets to disk. The new inode has never been

3278

* referenced on the disk, so can be deleted immediately.

3279

3280

if ((dirrem->dm_state & COMPLETE) == 0) {

3281

LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,

3282

dm_next);

3283

FREE_LOCK(&lk);

3284

} else {

3285

if (prevdirrem != NULL)

3286

LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,

3287

prevdirrem, dm_next);

3288

dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;

3289

FREE_LOCK(&lk);

3290

handle_workitem_remove(dirrem, NULL);

3291

}

3292

}

3293

3294

3295

* Allocate a new dirrem if appropriate and return it along with

3296

* its associated pagedep. Called without a lock, returns with lock.

3297

3298

static long num_dirrem; /* number of dirrem allocated */

3299

static struct dirrem *

3300

newdirrem(bp, dp, ip, isrmdir, prevdirremp)

3301

struct buf *bp; /* buffer containing directory block */

3302

struct inode *dp; /* inode for the directory being modified */

3303

struct inode *ip; /* inode for directory entry being removed */

3304

int isrmdir; /* indicates if doing RMDIR */

3305

struct dirrem **prevdirremp; /* previously referenced inode, if any */

3306

{

3307

int offset;

3308

ufs_lbn_t lbn;

3309

struct diradd *dap;

3310

struct dirrem *dirrem;

3311

struct pagedep *pagedep;

3312

3313

3314

* Whiteouts have no deletion dependencies.

3315

3316

if (ip == NULL)

3317

panic("newdirrem: whiteout");

3318

3319

* If we are over our limit, try to improve the situation.

3320

* Limiting the number of dirrem structures will also limit

3321

* the number of freefile and freeblks structures.

3322

3323

ACQUIRE_LOCK(&lk);

3324

if (!(ip->i_flags & SF_SNAPSHOT) && num_dirrem > max_softdeps / 2)

3325

(void) request_cleanup(ITOV(dp)->v_mount, FLUSH_REMOVE);

3326

num_dirrem += 1;

3327

FREE_LOCK(&lk);

3328

MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem),

3329

M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO);

3330

workitem_alloc(&dirrem->dm_list, D_DIRREM, ITOV(dp)->v_mount);

3331

dirrem->dm_state = isrmdir ? RMDIR : 0;

3332

dirrem->dm_oldinum = ip->i_number;

3333

*prevdirremp = NULL;

3334

3335

ACQUIRE_LOCK(&lk);

3336

lbn = lblkno(dp->i_fs, dp->i_offset);

3337

offset = blkoff(dp->i_fs, dp->i_offset);

3338

if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)

3339

WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);

3340

dirrem->dm_pagedep = pagedep;

3341

3342

* Check for a diradd dependency for the same directory entry.

3343

* If present, then both dependencies become obsolete and can

3344

* be de-allocated. Check for an entry on both the pd_dirraddhd

3345

* list and the pd_pendinghd list.

3346

3347

3348

LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)

3349

if (dap->da_offset == offset)

3350

break;

3351

if (dap == NULL) {

3352

3353

LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)

3354

if (dap->da_offset == offset)

3355

break;

3356

if (dap == NULL)

3357

return (dirrem);

3358

}

3359

3360

* Must be ATTACHED at this point.

3361

3362

if ((dap->da_state & ATTACHED) == 0)

3363

panic("newdirrem: not ATTACHED");

3364

if (dap->da_newinum != ip->i_number)

3365

panic("newdirrem: inum %d should be %d",

3366

ip->i_number, dap->da_newinum);

3367

3368

* If we are deleting a changed name that never made it to disk,

3369

* then return the dirrem describing the previous inode (which

3370

* represents the inode currently referenced from this entry on disk).

3371

3372

if ((dap->da_state & DIRCHG) != 0) {

3373

*prevdirremp = dap->da_previous;

3374

dap->da_state &= ~DIRCHG;

3375

dap->da_pagedep = pagedep;

3376

}

3377

3378

* We are deleting an entry that never made it to disk.

3379

* Mark it COMPLETE so we can delete its inode immediately.

3380

3381

dirrem->dm_state |= COMPLETE;

3382

free_diradd(dap);

3383

return (dirrem);

3384

}

3385

3386

3387

* Directory entry change dependencies.

3388

3389

* Changing an existing directory entry requires that an add operation

3390

* be completed first followed by a deletion. The semantics for the addition

3391

* are identical to the description of adding a new entry above except

3392

* that the rollback is to the old inode number rather than zero. Once

3393

* the addition dependency is completed, the removal is done as described

3394

* in the removal routine above.

3395

3396

3397

3398

* This routine should be called immediately after changing

3399

* a directory entry. The inode's link count should not be

3400

* decremented by the calling procedure -- the soft updates

3401

* code will perform this task when it is safe.

3402

3403

void

3404

softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)

3405

struct buf *bp; /* buffer containing directory block */

3406

struct inode *dp; /* inode for the directory being modified */

3407

struct inode *ip; /* inode for directory entry being removed */

3408

ino_t newinum; /* new inode number for changed entry */

3409

int isrmdir; /* indicates if doing RMDIR */

3410

{

3411

int offset;

3412

struct diradd *dap = NULL;

3413

struct dirrem *dirrem, *prevdirrem;

3414

struct pagedep *pagedep;

3415

struct inodedep *inodedep;

3416

struct mount *mp;

3417

3418

offset = blkoff(dp->i_fs, dp->i_offset);

3419

mp = UFSTOVFS(dp->i_ump);

3420

3421

3422

* Whiteouts do not need diradd dependencies.

3423

3424

if (newinum != WINO) {

3425

MALLOC(dap, struct diradd *, sizeof(struct diradd),

3426

M_DIRADD, M_SOFTDEP_FLAGS|M_ZERO);

3427

workitem_alloc(&dap->da_list, D_DIRADD, mp);

3428

dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;

3429

dap->da_offset = offset;

3430

dap->da_newinum = newinum;

3431

}

3432

3433

3434

* Allocate a new dirrem and ACQUIRE_LOCK.

3435

3436

dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);

3437

pagedep = dirrem->dm_pagedep;

3438

3439

* The possible values for isrmdir:

3440

* 0 - non-directory file rename

3441

* 1 - directory rename within same directory

3442

* inum - directory rename to new directory of given inode number

3443

* When renaming to a new directory, we are both deleting and

3444

* creating a new directory entry, so the link count on the new

3445

* directory should not change. Thus we do not need the followup

3446

* dirrem which is usually done in handle_workitem_remove. We set

3447

* the DIRCHG flag to tell handle_workitem_remove to skip the

3448

* followup dirrem.

3449

3450

if (isrmdir > 1)

3451

dirrem->dm_state |= DIRCHG;

3452

3453

3454

* Whiteouts have no additional dependencies,

3455

* so just put the dirrem on the correct list.

3456

3457

if (newinum == WINO) {

3458

if ((dirrem->dm_state & COMPLETE) == 0) {

3459

LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem,

3460

dm_next);

3461

} else {

3462

dirrem->dm_dirinum = pagedep->pd_ino;

3463

add_to_worklist(&dirrem->dm_list);

3464

}

3465

FREE_LOCK(&lk);

3466

return;

3467

}

3468

3469

3470

* If the COMPLETE flag is clear, then there were no active

3471

* entries and we want to roll back to the previous inode until

3472

* the new inode is committed to disk. If the COMPLETE flag is

3473

* set, then we have deleted an entry that never made it to disk.

3474

* If the entry we deleted resulted from a name change, then the old

3475

* inode reference still resides on disk. Any rollback that we do

3476

* needs to be to that old inode (returned to us in prevdirrem). If

3477

* the entry we deleted resulted from a create, then there is

3478

* no entry on the disk, so we want to roll back to zero rather

3479

* than the uncommitted inode. In either of the COMPLETE cases we

3480

* want to immediately free the unwritten and unreferenced inode.

3481

3482

if ((dirrem->dm_state & COMPLETE) == 0) {

3483

dap->da_previous = dirrem;

3484

} else {

3485

if (prevdirrem != NULL) {

3486

dap->da_previous = prevdirrem;

3487

} else {

3488

dap->da_state &= ~DIRCHG;

3489

dap->da_pagedep = pagedep;

3490

}

3491

dirrem->dm_dirinum = pagedep->pd_ino;

3492

add_to_worklist(&dirrem->dm_list);

3493

}

3494

3495

* Link into its inodedep. Put it on the id_bufwait list if the inode

3496

* is not yet written. If it is written, do the post-inode write

3497

* processing to put it on the id_pendinghd list.

3498

3499

if (inodedep_lookup(mp, newinum, DEPALLOC, &inodedep) == 0 ||

3500

(inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {

3501

dap->da_state |= COMPLETE;

3502

LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);

3503

WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);

3504

} else {

3505

LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],

3506

dap, da_pdlist);

3507

WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);

3508

}

3509

FREE_LOCK(&lk);

3510

}

3511

3512

3513

* Called whenever the link count on an inode is changed.

3514

* It creates an inode dependency so that the new reference(s)

3515

* to the inode cannot be committed to disk until the updated

3516

* inode has been written.

3517

3518

void

3519

softdep_change_linkcnt(ip)

3520

struct inode *ip; /* the inode with the increased link count */

3521

{

3522

struct inodedep *inodedep;

3523

3524

ACQUIRE_LOCK(&lk);

3525

(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number,

3526

DEPALLOC, &inodedep);

3527

if (ip->i_nlink < ip->i_effnlink)

3528

panic("softdep_change_linkcnt: bad delta");

3529

inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;

3530

FREE_LOCK(&lk);

3531

}

3532

3533

3534

* Called when the effective link count and the reference count

3535

* on an inode drops to zero. At this point there are no names

3536

* referencing the file in the filesystem and no active file

3537

* references. The space associated with the file will be freed

3538

* as soon as the necessary soft dependencies are cleared.

3539

3540

void

3541

softdep_releasefile(ip)

3542

struct inode *ip; /* inode with the zero effective link count */

3543

{

3544

struct inodedep *inodedep;

3545

struct fs *fs;

3546

int extblocks;

3547

3548

if (ip->i_effnlink > 0)

3549

panic("softdep_releasefile: file still referenced");

3550

3551

* We may be called several times as the on-disk link count

3552

* drops to zero. We only want to account for the space once.

3553

3554

if (ip->i_flag & IN_SPACECOUNTED)

3555

return;

3556

3557

* We have to deactivate a snapshot otherwise copyonwrites may

3558

* add blocks and the cleanup may remove blocks after we have

3559

* tried to account for them.

3560

3561

if ((ip->i_flags & SF_SNAPSHOT) != 0)

3562

ffs_snapremove(ITOV(ip));

3563

3564

* If we are tracking an nlinkdelta, we have to also remember

3565

* whether we accounted for the freed space yet.

3566

3567

ACQUIRE_LOCK(&lk);

3568

if ((inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, &inodedep)))

3569

inodedep->id_state |= SPACECOUNTED;

3570

FREE_LOCK(&lk);

3571

fs = ip->i_fs;

3572

extblocks = 0;

3573

if (fs->fs_magic == FS_UFS2_MAGIC)

3574

extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));

3575

UFS_LOCK(ip->i_ump);

3576

ip->i_fs->fs_pendingblocks += DIP(ip, i_blocks) - extblocks;

3577

ip->i_fs->fs_pendinginodes += 1;

3578

UFS_UNLOCK(ip->i_ump);

3579

ip->i_flag |= IN_SPACECOUNTED;

3580

}

3581

3582

3583

* This workitem decrements the inode's link count.

3584

* If the link count reaches zero, the file is removed.

3585

3586

static void

3587

handle_workitem_remove(dirrem, xp)

3588

struct dirrem *dirrem;

3589

struct vnode *xp;

3590

{

3591

struct thread *td = curthread;

3592

struct inodedep *inodedep;

3593

struct vnode *vp;

3594

struct inode *ip;

3595

ino_t oldinum;

3596

int error;

3597

3598

if ((vp = xp) == NULL &&

3599

(error = ffs_vgetf(dirrem->dm_list.wk_mp,

3600

dirrem->dm_oldinum, LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)) != 0) {

3601

softdep_error("handle_workitem_remove: vget", error);

3602

return;

3603

}

3604

ip = VTOI(vp);

3605

ACQUIRE_LOCK(&lk);

3606

if ((inodedep_lookup(dirrem->dm_list.wk_mp,

3607

dirrem->dm_oldinum, 0, &inodedep)) == 0)

3608

panic("handle_workitem_remove: lost inodedep");

3609

3610

* Normal file deletion.

3611

3612

if ((dirrem->dm_state & RMDIR) == 0) {

3613

ip->i_nlink--;

3614

DIP_SET(ip, i_nlink, ip->i_nlink);

3615

ip->i_flag |= IN_CHANGE;

3616

if (ip->i_nlink < ip->i_effnlink)

3617

panic("handle_workitem_remove: bad file delta");

3618

inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;

3619

num_dirrem -= 1;

3620

WORKITEM_FREE(dirrem, D_DIRREM);

3621

FREE_LOCK(&lk);

3622

vput(vp);

3623

return;

3624

}

3625

3626

* Directory deletion. Decrement reference count for both the

3627

* just deleted parent directory entry and the reference for ".".

3628

* Next truncate the directory to length zero. When the

3629

* truncation completes, arrange to have the reference count on

3630

* the parent decremented to account for the loss of "..".

3631

3632

ip->i_nlink -= 2;

3633

DIP_SET(ip, i_nlink, ip->i_nlink);

3634

ip->i_flag |= IN_CHANGE;

3635

if (ip->i_nlink < ip->i_effnlink)

3636

panic("handle_workitem_remove: bad dir delta");

3637

inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;

3638

FREE_LOCK(&lk);

3639

if ((error = ffs_truncate(vp, (off_t)0, 0, td->td_ucred, td)) != 0)

3640

softdep_error("handle_workitem_remove: truncate", error);

3641

ACQUIRE_LOCK(&lk);

3642

3643

* Rename a directory to a new parent. Since, we are both deleting

3644

* and creating a new directory entry, the link count on the new

3645

* directory should not change. Thus we skip the followup dirrem.

3646

3647

if (dirrem->dm_state & DIRCHG) {

3648

num_dirrem -= 1;

3649

WORKITEM_FREE(dirrem, D_DIRREM);

3650

FREE_LOCK(&lk);

3651

vput(vp);

3652

return;

3653

}

3654

3655

* If the inodedep does not exist, then the zero'ed inode has

3656

* been written to disk. If the allocated inode has never been

3657

* written to disk, then the on-disk inode is zero'ed. In either

3658

* case we can remove the file immediately.

3659

3660

dirrem->dm_state = 0;

3661

oldinum = dirrem->dm_oldinum;

3662

dirrem->dm_oldinum = dirrem->dm_dirinum;

3663

if (inodedep_lookup(dirrem->dm_list.wk_mp, oldinum,

3664

0, &inodedep) == 0 || check_inode_unwritten(inodedep)) {

3665

if (xp != NULL)

3666

add_to_worklist(&dirrem->dm_list);

3667

FREE_LOCK(&lk);

3668

vput(vp);

3669

if (xp == NULL)

3670

handle_workitem_remove(dirrem, NULL);

3671

return;

3672

}

3673

WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);

3674

FREE_LOCK(&lk);

3675

ip->i_flag |= IN_CHANGE;

3676

ffs_update(vp, 0);

3677

vput(vp);

3678

}

3679

3680

3681

* Inode de-allocation dependencies.

3682

3683

* When an inode's link count is reduced to zero, it can be de-allocated. We

3684

* found it convenient to postpone de-allocation until after the inode is

3685

* written to disk with its new link count (zero). At this point, all of the

3686

* on-disk inode's block pointers are nullified and, with careful dependency

3687

* list ordering, all dependencies related to the inode will be satisfied and

3688

* the corresponding dependency structures de-allocated. So, if/when the

3689

* inode is reused, there will be no mixing of old dependencies with new

3690

* ones. This artificial dependency is set up by the block de-allocation

3691

* procedure above (softdep_setup_freeblocks) and completed by the

3692

* following procedure.

3693

3694

static void

3695

handle_workitem_freefile(freefile)

3696

struct freefile *freefile;

3697

{

3698

struct fs *fs;

3699

struct inodedep *idp;

3700

struct ufsmount *ump;

3701

int error;

3702

3703

ump = VFSTOUFS(freefile->fx_list.wk_mp);

3704

fs = ump->um_fs;

3705

#ifdef DEBUG

3706

ACQUIRE_LOCK(&lk);

3707

error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp);

3708

FREE_LOCK(&lk);

3709

if (error)

3710

panic("handle_workitem_freefile: inodedep survived");

3711

#endif

3712

UFS_LOCK(ump);

3713

fs->fs_pendinginodes -= 1;

3714

UFS_UNLOCK(ump);

3715

if ((error = ffs_freefile(ump, fs, freefile->fx_devvp,

3716

freefile->fx_oldinum, freefile->fx_mode)) != 0)

3717

softdep_error("handle_workitem_freefile", error);

3718

ACQUIRE_LOCK(&lk);

3719

WORKITEM_FREE(freefile, D_FREEFILE);

3720

FREE_LOCK(&lk);

3721

}

3722

3723

3724

3725

* Helper function which unlinks marker element from work list and returns

3726

* the next element on the list.

3727

3728

static __inline struct worklist *

3729

markernext(struct worklist *marker)

3730

{

3731

struct worklist *next;

3732

3733

next = LIST_NEXT(marker, wk_list);

3734

LIST_REMOVE(marker, wk_list);

3735

return next;

3736

}

3737

3738

3739

* Disk writes.

3740

3741

* The dependency structures constructed above are most actively used when file

3742

* system blocks are written to disk. No constraints are placed on when a

3743

* block can be written, but unsatisfied update dependencies are made safe by

3744

* modifying (or replacing) the source memory for the duration of the disk

3745

* write. When the disk write completes, the memory block is again brought

3746

* up-to-date.

3747

3748

* In-core inode structure reclamation.

3749

3750

* Because there are a finite number of "in-core" inode structures, they are

3751

* reused regularly. By transferring all inode-related dependencies to the

3752

* in-memory inode block and indexing them separately (via "inodedep"s), we

3753

* can allow "in-core" inode structures to be reused at any time and avoid

3754

* any increase in contention.

3755

3756

* Called just before entering the device driver to initiate a new disk I/O.

3757

* The buffer must be locked, thus, no I/O completion operations can occur

3758

* while we are manipulating its associated dependencies.

3759

3760

static void

3761

softdep_disk_io_initiation(bp)

3762

struct buf *bp; /* structure describing disk write to occur */

3763

{

3764

struct worklist *wk;

3765

struct worklist marker;

3766

struct indirdep *indirdep;

3767

struct inodedep *inodedep;

3768

3769

3770

* We only care about write operations. There should never

3771

* be dependencies for reads.

3772

3773

if (bp->b_iocmd != BIO_WRITE)

3774

panic("softdep_disk_io_initiation: not write");

3775

3776

marker.wk_type = D_LAST + 1; /* Not a normal workitem */

3777

PHOLD(curproc); /* Don't swap out kernel stack */

3778

3779

ACQUIRE_LOCK(&lk);

3780

3781

* Do any necessary pre-I/O processing.

3782

3783

for (wk = LIST_FIRST(&bp->b_dep); wk != NULL;

3784

wk = markernext(&marker)) {

3785

LIST_INSERT_AFTER(wk, &marker, wk_list);

3786

switch (wk->wk_type) {

3787

3788

case D_PAGEDEP:

3789

initiate_write_filepage(WK_PAGEDEP(wk), bp);

3790

continue;

3791

3792

case D_INODEDEP:

3793

inodedep = WK_INODEDEP(wk);

3794

if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC)

3795

initiate_write_inodeblock_ufs1(inodedep, bp);

3796

else

3797

initiate_write_inodeblock_ufs2(inodedep, bp);

3798

continue;

3799

3800

case D_INDIRDEP:

3801

indirdep = WK_INDIRDEP(wk);

3802

if (indirdep->ir_state & GOINGAWAY)

3803

panic("disk_io_initiation: indirdep gone");

3804

3805

* If there are no remaining dependencies, this

3806

* will be writing the real pointers, so the

3807

* dependency can be freed.

3808

3809

if (LIST_EMPTY(&indirdep->ir_deplisthd)) {

3810

struct buf *bp;

3811

3812

bp = indirdep->ir_savebp;

3813

bp->b_flags |= B_INVAL | B_NOCACHE;

3814

/* inline expand WORKLIST_REMOVE(wk); */

3815

wk->wk_state &= ~ONWORKLIST;

3816

LIST_REMOVE(wk, wk_list);

3817

WORKITEM_FREE(indirdep, D_INDIRDEP);

3818

FREE_LOCK(&lk);

3819

brelse(bp);

3820

ACQUIRE_LOCK(&lk);

3821

continue;

3822

}

3823

3824

* Replace up-to-date version with safe version.

3825

3826

FREE_LOCK(&lk);

3827

MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,

3828

M_INDIRDEP, M_SOFTDEP_FLAGS);

3829

ACQUIRE_LOCK(&lk);

3830

indirdep->ir_state &= ~ATTACHED;

3831

indirdep->ir_state |= UNDONE;

3832

bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);

3833

bcopy(indirdep->ir_savebp->b_data, bp->b_data,

3834

bp->b_bcount);

3835

continue;

3836

3837

case D_MKDIR:

3838

case D_BMSAFEMAP:

3839

case D_ALLOCDIRECT:

3840

case D_ALLOCINDIR:

3841

continue;

3842

3843

default:

3844

panic("handle_disk_io_initiation: Unexpected type %s",

3845

TYPENAME(wk->wk_type));

3846

/* NOTREACHED */

3847

}

3848

}

3849

FREE_LOCK(&lk);

3850

PRELE(curproc); /* Allow swapout of kernel stack */

3851

}

3852

3853

3854

* Called from within the procedure above to deal with unsatisfied

3855

* allocation dependencies in a directory. The buffer must be locked,

3856

* thus, no I/O completion operations can occur while we are

3857

* manipulating its associated dependencies.

3858

3859

static void

3860

initiate_write_filepage(pagedep, bp)

3861

struct pagedep *pagedep;

3862

struct buf *bp;

3863

{

3864

struct diradd *dap;

3865

struct direct *ep;

3866

int i;

3867

3868

if (pagedep->pd_state & IOSTARTED) {

3869

3870

* This can only happen if there is a driver that does not

3871

* understand chaining. Here biodone will reissue the call

3872

* to strategy for the incomplete buffers.

3873

3874

printf("initiate_write_filepage: already started\n");

3875

return;

3876

}

3877

pagedep->pd_state |= IOSTARTED;

3878

for (i = 0; i < DAHASHSZ; i++) {

3879

LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {

3880

ep = (struct direct *)

3881

((char *)bp->b_data + dap->da_offset);

3882

if (ep->d_ino != dap->da_newinum)

3883

panic("%s: dir inum %d != new %d",

3884

"initiate_write_filepage",

3885

ep->d_ino, dap->da_newinum);

3886

if (dap->da_state & DIRCHG)

3887

ep->d_ino = dap->da_previous->dm_oldinum;

3888

else

3889

ep->d_ino = 0;

3890

dap->da_state &= ~ATTACHED;

3891

dap->da_state |= UNDONE;

3892

}

3893

}

3894

}

3895

3896

3897

* Version of initiate_write_inodeblock that handles UFS1 dinodes.

3898

* Note that any bug fixes made to this routine must be done in the

3899

* version found below.

3900

3901

* Called from within the procedure above to deal with unsatisfied

3902

* allocation dependencies in an inodeblock. The buffer must be

3903

* locked, thus, no I/O completion operations can occur while we

3904

* are manipulating its associated dependencies.

3905

3906

static void

3907

initiate_write_inodeblock_ufs1(inodedep, bp)

3908

struct inodedep *inodedep;

3909

struct buf *bp; /* The inode block */

3910

{

3911

struct allocdirect *adp, *lastadp;

3912

struct ufs1_dinode *dp;

3913

struct ufs1_dinode *sip;

3914

struct fs *fs;

3915

ufs_lbn_t i;

3916

#ifdef INVARIANTS

3917

ufs_lbn_t prevlbn = 0;

3918

#endif

3919

int deplist;

3920

3921

if (inodedep->id_state & IOSTARTED)

3922

panic("initiate_write_inodeblock_ufs1: already started");

3923

inodedep->id_state |= IOSTARTED;

3924

fs = inodedep->id_fs;

3925

dp = (struct ufs1_dinode *)bp->b_data +

3926

ino_to_fsbo(fs, inodedep->id_ino);

3927

3928

* If the bitmap is not yet written, then the allocated

3929

* inode cannot be written to disk.

3930

3931

if ((inodedep->id_state & DEPCOMPLETE) == 0) {

3932

if (inodedep->id_savedino1 != NULL)

3933

panic("initiate_write_inodeblock_ufs1: I/O underway");

3934

FREE_LOCK(&lk);

3935

MALLOC(sip, struct ufs1_dinode *,

3936

sizeof(struct ufs1_dinode), M_SAVEDINO, M_SOFTDEP_FLAGS);

3937

ACQUIRE_LOCK(&lk);

3938

inodedep->id_savedino1 = sip;

3939

*inodedep->id_savedino1 = *dp;

3940

bzero((caddr_t)dp, sizeof(struct ufs1_dinode));

3941

dp->di_gen = inodedep->id_savedino1->di_gen;

3942

return;

3943

}

3944

3945

* If no dependencies, then there is nothing to roll back.

3946

3947

inodedep->id_savedsize = dp->di_size;

3948

inodedep->id_savedextsize = 0;

3949

if (TAILQ_EMPTY(&inodedep->id_inoupdt))

3950

return;

3951

3952

* Set the dependencies to busy.

3953

3954

for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;

3955

adp = TAILQ_NEXT(adp, ad_next)) {

3956

#ifdef INVARIANTS

3957

if (deplist != 0 && prevlbn >= adp->ad_lbn)

3958

panic("softdep_write_inodeblock: lbn order");

3959

prevlbn = adp->ad_lbn;

3960

if (adp->ad_lbn < NDADDR &&

3961

dp->di_db[adp->ad_lbn] != adp->ad_newblkno)

3962

panic("%s: direct pointer #%jd mismatch %d != %jd",

3963

"softdep_write_inodeblock",

3964

(intmax_t)adp->ad_lbn,

3965

dp->di_db[adp->ad_lbn],

3966

(intmax_t)adp->ad_newblkno);

3967

if (adp->ad_lbn >= NDADDR &&

3968

dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno)

3969

panic("%s: indirect pointer #%jd mismatch %d != %jd",

3970

"softdep_write_inodeblock",

3971

(intmax_t)adp->ad_lbn - NDADDR,

3972

dp->di_ib[adp->ad_lbn - NDADDR],

3973

(intmax_t)adp->ad_newblkno);

3974

deplist |= 1 << adp->ad_lbn;

3975

if ((adp->ad_state & ATTACHED) == 0)

3976

panic("softdep_write_inodeblock: Unknown state 0x%x",

3977

adp->ad_state);

3978

#endif /* INVARIANTS */

3979

adp->ad_state &= ~ATTACHED;

3980

adp->ad_state |= UNDONE;

3981

}

3982

3983

* The on-disk inode cannot claim to be any larger than the last

3984

* fragment that has been written. Otherwise, the on-disk inode

3985

* might have fragments that were not the last block in the file

3986

* which would corrupt the filesystem.

3987

3988

for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;

3989

lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {

3990

if (adp->ad_lbn >= NDADDR)

3991

break;

3992

dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;

3993

/* keep going until hitting a rollback to a frag */

3994

if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)

3995

continue;

3996

dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;

3997

for (i = adp->ad_lbn + 1; i < NDADDR; i++) {

3998

#ifdef INVARIANTS

3999

if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)

4000

panic("softdep_write_inodeblock: lost dep1");

4001

#endif /* INVARIANTS */

4002

dp->di_db[i] = 0;

4003

}

4004

for (i = 0; i < NIADDR; i++) {

4005

#ifdef INVARIANTS

4006

if (dp->di_ib[i] != 0 &&

4007

(deplist & ((1 << NDADDR) << i)) == 0)

4008

panic("softdep_write_inodeblock: lost dep2");

4009

#endif /* INVARIANTS */

4010

dp->di_ib[i] = 0;

4011

}

4012

return;

4013

}

4014

4015

* If we have zero'ed out the last allocated block of the file,

4016

* roll back the size to the last currently allocated block.

4017

* We know that this last allocated block is a full-sized as

4018

* we already checked for fragments in the loop above.

4019

4020

if (lastadp != NULL &&

4021

dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {

4022

for (i = lastadp->ad_lbn; i >= 0; i--)

4023

if (dp->di_db[i] != 0)

4024

break;

4025

dp->di_size = (i + 1) * fs->fs_bsize;

4026

}

4027

4028

* The only dependencies are for indirect blocks.

4029

4030

* The file size for indirect block additions is not guaranteed.

4031

* Such a guarantee would be non-trivial to achieve. The conventional

4032

* synchronous write implementation also does not make this guarantee.

4033

* Fsck should catch and fix discrepancies. Arguably, the file size

4034

* can be over-estimated without destroying integrity when the file

4035

* moves into the indirect blocks (i.e., is large). If we want to

4036

* postpone fsck, we are stuck with this argument.

4037

4038

for (; adp; adp = TAILQ_NEXT(adp, ad_next))

4039

dp->di_ib[adp->ad_lbn - NDADDR] = 0;

4040

}

4041

4042

4043

* Version of initiate_write_inodeblock that handles UFS2 dinodes.

4044

* Note that any bug fixes made to this routine must be done in the

4045

* version found above.

4046

4047

* Called from within the procedure above to deal with unsatisfied

4048

* allocation dependencies in an inodeblock. The buffer must be

4049

* locked, thus, no I/O completion operations can occur while we

4050

* are manipulating its associated dependencies.

4051

4052

static void

4053

initiate_write_inodeblock_ufs2(inodedep, bp)

4054

struct inodedep *inodedep;

4055

struct buf *bp; /* The inode block */

4056

{

4057

struct allocdirect *adp, *lastadp;

4058

struct ufs2_dinode *dp;

4059

struct ufs2_dinode *sip;

4060

struct fs *fs;

4061

ufs_lbn_t i;

4062

#ifdef INVARIANTS

4063

ufs_lbn_t prevlbn = 0;

4064

#endif

4065

int deplist;

4066

4067

if (inodedep->id_state & IOSTARTED)

4068

panic("initiate_write_inodeblock_ufs2: already started");

4069

inodedep->id_state |= IOSTARTED;

4070

fs = inodedep->id_fs;

4071

dp = (struct ufs2_dinode *)bp->b_data +

4072

ino_to_fsbo(fs, inodedep->id_ino);

4073

4074

* If the bitmap is not yet written, then the allocated

4075

* inode cannot be written to disk.

4076

4077

if ((inodedep->id_state & DEPCOMPLETE) == 0) {

4078

if (inodedep->id_savedino2 != NULL)

4079

panic("initiate_write_inodeblock_ufs2: I/O underway");

4080

FREE_LOCK(&lk);

4081

MALLOC(sip, struct ufs2_dinode *,

4082

sizeof(struct ufs2_dinode), M_SAVEDINO, M_SOFTDEP_FLAGS);

4083

ACQUIRE_LOCK(&lk);

4084

inodedep->id_savedino2 = sip;

4085

*inodedep->id_savedino2 = *dp;

4086

bzero((caddr_t)dp, sizeof(struct ufs2_dinode));

4087

dp->di_gen = inodedep->id_savedino2->di_gen;

4088

return;

4089

}

4090

4091

* If no dependencies, then there is nothing to roll back.

4092

4093

inodedep->id_savedsize = dp->di_size;

4094

inodedep->id_savedextsize = dp->di_extsize;

4095

if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&

4096

TAILQ_EMPTY(&inodedep->id_extupdt))

4097

return;

4098

4099

* Set the ext data dependencies to busy.

4100

4101

for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;

4102

adp = TAILQ_NEXT(adp, ad_next)) {

4103

#ifdef INVARIANTS

4104

if (deplist != 0 && prevlbn >= adp->ad_lbn)

4105

panic("softdep_write_inodeblock: lbn order");

4106

prevlbn = adp->ad_lbn;

4107

if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno)

4108

panic("%s: direct pointer #%jd mismatch %jd != %jd",

4109

"softdep_write_inodeblock",

4110

(intmax_t)adp->ad_lbn,

4111

(intmax_t)dp->di_extb[adp->ad_lbn],

4112

(intmax_t)adp->ad_newblkno);

4113

deplist |= 1 << adp->ad_lbn;

4114

if ((adp->ad_state & ATTACHED) == 0)

4115

panic("softdep_write_inodeblock: Unknown state 0x%x",

4116

adp->ad_state);

4117

#endif /* INVARIANTS */

4118

adp->ad_state &= ~ATTACHED;

4119

adp->ad_state |= UNDONE;

4120

}

4121

4122

* The on-disk inode cannot claim to be any larger than the last

4123

* fragment that has been written. Otherwise, the on-disk inode

4124

* might have fragments that were not the last block in the ext

4125

* data which would corrupt the filesystem.

4126

4127

for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;

4128

lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {

4129

dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;

4130

/* keep going until hitting a rollback to a frag */

4131

if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)

4132

continue;

4133

dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;

4134

for (i = adp->ad_lbn + 1; i < NXADDR; i++) {

4135

#ifdef INVARIANTS

4136

if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0)

4137

panic("softdep_write_inodeblock: lost dep1");

4138

#endif /* INVARIANTS */

4139

dp->di_extb[i] = 0;

4140

}

4141

lastadp = NULL;

4142

break;

4143

}

4144

4145

* If we have zero'ed out the last allocated block of the ext

4146

* data, roll back the size to the last currently allocated block.

4147

* We know that this last allocated block is a full-sized as

4148

* we already checked for fragments in the loop above.

4149

4150

if (lastadp != NULL &&

4151

dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {

4152

for (i = lastadp->ad_lbn; i >= 0; i--)

4153

if (dp->di_extb[i] != 0)

4154

break;

4155

dp->di_extsize = (i + 1) * fs->fs_bsize;

4156

}

4157

4158

* Set the file data dependencies to busy.

4159

4160

for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;

4161

adp = TAILQ_NEXT(adp, ad_next)) {

4162

#ifdef INVARIANTS

4163

if (deplist != 0 && prevlbn >= adp->ad_lbn)

4164

panic("softdep_write_inodeblock: lbn order");

4165

prevlbn = adp->ad_lbn;

4166

if (adp->ad_lbn < NDADDR &&

4167

dp->di_db[adp->ad_lbn] != adp->ad_newblkno)

4168

panic("%s: direct pointer #%jd mismatch %jd != %jd",

4169

"softdep_write_inodeblock",

4170

(intmax_t)adp->ad_lbn,

4171

(intmax_t)dp->di_db[adp->ad_lbn],

4172

(intmax_t)adp->ad_newblkno);

4173

if (adp->ad_lbn >= NDADDR &&

4174

dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno)

4175

panic("%s indirect pointer #%jd mismatch %jd != %jd",

4176

"softdep_write_inodeblock:",

4177

(intmax_t)adp->ad_lbn - NDADDR,

4178

(intmax_t)dp->di_ib[adp->ad_lbn - NDADDR],

4179

(intmax_t)adp->ad_newblkno);

4180

deplist |= 1 << adp->ad_lbn;

4181

if ((adp->ad_state & ATTACHED) == 0)

4182

panic("softdep_write_inodeblock: Unknown state 0x%x",

4183

adp->ad_state);

4184

#endif /* INVARIANTS */

4185

adp->ad_state &= ~ATTACHED;

4186

adp->ad_state |= UNDONE;

4187

}

4188

4189

* The on-disk inode cannot claim to be any larger than the last

4190

* fragment that has been written. Otherwise, the on-disk inode

4191

* might have fragments that were not the last block in the file

4192

* which would corrupt the filesystem.

4193

4194

for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;

4195

lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {

4196

if (adp->ad_lbn >= NDADDR)

4197

break;

4198

dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;

4199

/* keep going until hitting a rollback to a frag */

4200

if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)

4201

continue;

4202

dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;

4203

for (i = adp->ad_lbn + 1; i < NDADDR; i++) {

4204

#ifdef INVARIANTS

4205

if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)

4206

panic("softdep_write_inodeblock: lost dep2");

4207

#endif /* INVARIANTS */

4208

dp->di_db[i] = 0;

4209

}

4210

for (i = 0; i < NIADDR; i++) {

4211

#ifdef INVARIANTS

4212

if (dp->di_ib[i] != 0 &&

4213

(deplist & ((1 << NDADDR) << i)) == 0)

4214

panic("softdep_write_inodeblock: lost dep3");

4215

#endif /* INVARIANTS */

4216

dp->di_ib[i] = 0;

4217

}

4218

return;

4219

}

4220

4221

* If we have zero'ed out the last allocated block of the file,

4222

* roll back the size to the last currently allocated block.

4223

* We know that this last allocated block is a full-sized as

4224

* we already checked for fragments in the loop above.

4225

4226

if (lastadp != NULL &&

4227

dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {

4228

for (i = lastadp->ad_lbn; i >= 0; i--)

4229

if (dp->di_db[i] != 0)

4230

break;

4231

dp->di_size = (i + 1) * fs->fs_bsize;

4232

}

4233

4234

* The only dependencies are for indirect blocks.

4235

4236

* The file size for indirect block additions is not guaranteed.

4237

* Such a guarantee would be non-trivial to achieve. The conventional

4238

* synchronous write implementation also does not make this guarantee.

4239

* Fsck should catch and fix discrepancies. Arguably, the file size

4240

* can be over-estimated without destroying integrity when the file

4241

* moves into the indirect blocks (i.e., is large). If we want to

4242

* postpone fsck, we are stuck with this argument.

4243

4244

for (; adp; adp = TAILQ_NEXT(adp, ad_next))

4245

dp->di_ib[adp->ad_lbn - NDADDR] = 0;

4246

}

4247

4248

4249

* This routine is called during the completion interrupt

4250

* service routine for a disk write (from the procedure called

4251

* by the device driver to inform the filesystem caches of

4252

* a request completion). It should be called early in this

4253

* procedure, before the block is made available to other

4254

* processes or other routines are called.

4255

4256

static void

4257

softdep_disk_write_complete(bp)

4258

struct buf *bp; /* describes the completed disk write */

4259

{

4260

struct worklist *wk;

4261

struct worklist *owk;

4262

struct workhead reattach;

4263

struct newblk *newblk;

4264

struct allocindir *aip;

4265

struct allocdirect *adp;

4266

struct indirdep *indirdep;

4267

struct inodedep *inodedep;

4268

struct bmsafemap *bmsafemap;

4269

4270

4271

* If an error occurred while doing the write, then the data

4272

* has not hit the disk and the dependencies cannot be unrolled.

4273

4274

if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0)

4275

return;

4276

LIST_INIT(&reattach);

4277

4278

* This lock must not be released anywhere in this code segment.

4279

4280

ACQUIRE_LOCK(&lk);

4281

owk = NULL;

4282

while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {

4283

WORKLIST_REMOVE(wk);

4284

if (wk == owk)

4285

panic("duplicate worklist: %p\n", wk);

4286

owk = wk;

4287

switch (wk->wk_type) {

4288

4289

case D_PAGEDEP:

4290

if (handle_written_filepage(WK_PAGEDEP(wk), bp))

4291

WORKLIST_INSERT(&reattach, wk);

4292

continue;

4293

4294

case D_INODEDEP:

4295

if (handle_written_inodeblock(WK_INODEDEP(wk), bp))

4296

WORKLIST_INSERT(&reattach, wk);

4297

continue;

4298

4299

case D_BMSAFEMAP:

4300

bmsafemap = WK_BMSAFEMAP(wk);

4301

while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) {

4302

newblk->nb_state |= DEPCOMPLETE;

4303

newblk->nb_bmsafemap = NULL;

4304

LIST_REMOVE(newblk, nb_deps);

4305

}

4306

while ((adp =

4307

LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {

4308

adp->ad_state |= DEPCOMPLETE;

4309

adp->ad_buf = NULL;

4310

LIST_REMOVE(adp, ad_deps);

4311

handle_allocdirect_partdone(adp);

4312

}

4313

while ((aip =

4314

LIST_FIRST(&bmsafemap->sm_allocindirhd))) {

4315

aip->ai_state |= DEPCOMPLETE;

4316

aip->ai_buf = NULL;

4317

LIST_REMOVE(aip, ai_deps);

4318

handle_allocindir_partdone(aip);

4319

}

4320

while ((inodedep =

4321

LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {

4322

inodedep->id_state |= DEPCOMPLETE;

4323

LIST_REMOVE(inodedep, id_deps);

4324

inodedep->id_buf = NULL;

4325

}

4326

WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);

4327

continue;

4328

4329

case D_MKDIR:

4330

handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);

4331

continue;

4332

4333

case D_ALLOCDIRECT:

4334

adp = WK_ALLOCDIRECT(wk);

4335

adp->ad_state |= COMPLETE;

4336

handle_allocdirect_partdone(adp);

4337

continue;

4338

4339

case D_ALLOCINDIR:

4340

aip = WK_ALLOCINDIR(wk);

4341

aip->ai_state |= COMPLETE;

4342

handle_allocindir_partdone(aip);

4343

continue;

4344

4345

case D_INDIRDEP:

4346

indirdep = WK_INDIRDEP(wk);

4347

if (indirdep->ir_state & GOINGAWAY)

4348

panic("disk_write_complete: indirdep gone");

4349

bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);

4350

FREE(indirdep->ir_saveddata, M_INDIRDEP);

4351

indirdep->ir_saveddata = 0;

4352

indirdep->ir_state &= ~UNDONE;

4353

indirdep->ir_state |= ATTACHED;

4354

while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {

4355

handle_allocindir_partdone(aip);

4356

if (aip == LIST_FIRST(&indirdep->ir_donehd))

4357

panic("disk_write_complete: not gone");

4358

}

4359

WORKLIST_INSERT(&reattach, wk);

4360

if ((bp->b_flags & B_DELWRI) == 0)

4361

stat_indir_blk_ptrs++;

4362

bdirty(bp);

4363

continue;

4364

4365

default:

4366

panic("handle_disk_write_complete: Unknown type %s",

4367

TYPENAME(wk->wk_type));

4368

/* NOTREACHED */

4369

}

4370

}

4371

4372

* Reattach any requests that must be redone.

4373

4374

while ((wk = LIST_FIRST(&reattach)) != NULL) {

4375

WORKLIST_REMOVE(wk);

4376

WORKLIST_INSERT(&bp->b_dep, wk);

4377

}

4378

FREE_LOCK(&lk);

4379

}

4380

4381

4382

* Called from within softdep_disk_write_complete above. Note that

4383

* this routine is always called from interrupt level with further

4384

* splbio interrupts blocked.

4385

4386

static void

4387

handle_allocdirect_partdone(adp)

4388

struct allocdirect *adp; /* the completed allocdirect */

4389

{

4390

struct allocdirectlst *listhead;

4391

struct allocdirect *listadp;

4392

struct inodedep *inodedep;

4393

long bsize, delay;

4394

4395

if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)

4396

return;

4397

if (adp->ad_buf != NULL)

4398

panic("handle_allocdirect_partdone: dangling dep");

4399

4400

* The on-disk inode cannot claim to be any larger than the last

4401

* fragment that has been written. Otherwise, the on-disk inode

4402

* might have fragments that were not the last block in the file

4403

* which would corrupt the filesystem. Thus, we cannot free any

4404

* allocdirects after one whose ad_oldblkno claims a fragment as

4405

* these blocks must be rolled back to zero before writing the inode.

4406

* We check the currently active set of allocdirects in id_inoupdt

4407

* or id_extupdt as appropriate.

4408

4409

inodedep = adp->ad_inodedep;

4410

bsize = inodedep->id_fs->fs_bsize;

4411

if (adp->ad_state & EXTDATA)

4412

listhead = &inodedep->id_extupdt;

4413

else

4414

listhead = &inodedep->id_inoupdt;

4415

TAILQ_FOREACH(listadp, listhead, ad_next) {

4416

/* found our block */

4417

if (listadp == adp)

4418

break;

4419

/* continue if ad_oldlbn is not a fragment */

4420

if (listadp->ad_oldsize == 0 ||

4421

listadp->ad_oldsize == bsize)

4422

continue;

4423

/* hit a fragment */

4424

return;

4425

}

4426

4427

* If we have reached the end of the current list without

4428

* finding the just finished dependency, then it must be

4429

* on the future dependency list. Future dependencies cannot

4430

* be freed until they are moved to the current list.

4431

4432

if (listadp == NULL) {

4433

#ifdef DEBUG

4434

if (adp->ad_state & EXTDATA)

4435

listhead = &inodedep->id_newextupdt;

4436

else

4437

listhead = &inodedep->id_newinoupdt;

4438

TAILQ_FOREACH(listadp, listhead, ad_next)

4439

/* found our block */

4440

if (listadp == adp)

4441

break;

4442

if (listadp == NULL)

4443

panic("handle_allocdirect_partdone: lost dep");

4444

#endif /* DEBUG */

4445

return;

4446

}

4447

4448

* If we have found the just finished dependency, then free

4449

* it along with anything that follows it that is complete.

4450

* If the inode still has a bitmap dependency, then it has

4451

* never been written to disk, hence the on-disk inode cannot

4452

* reference the old fragment so we can free it without delay.

4453

4454

delay = (inodedep->id_state & DEPCOMPLETE);

4455

for (; adp; adp = listadp) {

4456

listadp = TAILQ_NEXT(adp, ad_next);

4457

if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)

4458

return;

4459

free_allocdirect(listhead, adp, delay);

4460

}

4461

}

4462

4463

4464

* Called from within softdep_disk_write_complete above. Note that

4465

* this routine is always called from interrupt level with further

4466

* splbio interrupts blocked.

4467

4468

static void

4469

handle_allocindir_partdone(aip)

4470

struct allocindir *aip; /* the completed allocindir */

4471

{

4472

struct indirdep *indirdep;

4473

4474

if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)

4475

return;

4476

if (aip->ai_buf != NULL)

4477

panic("handle_allocindir_partdone: dangling dependency");

4478

indirdep = aip->ai_indirdep;

4479

if (indirdep->ir_state & UNDONE) {

4480

LIST_REMOVE(aip, ai_next);

4481

LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);

4482

return;

4483

}

4484

if (indirdep->ir_state & UFS1FMT)

4485

((ufs1_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =

4486

aip->ai_newblkno;

4487

else

4488

((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =

4489

aip->ai_newblkno;

4490

LIST_REMOVE(aip, ai_next);

4491

if (aip->ai_freefrag != NULL)

4492

add_to_worklist(&aip->ai_freefrag->ff_list);

4493

WORKITEM_FREE(aip, D_ALLOCINDIR);

4494

}

4495

4496

4497

* Called from within softdep_disk_write_complete above to restore

4498

* in-memory inode block contents to their most up-to-date state. Note

4499

* that this routine is always called from interrupt level with further

4500

* splbio interrupts blocked.

4501

4502

static int

4503

handle_written_inodeblock(inodedep, bp)

4504

struct inodedep *inodedep;

4505

struct buf *bp; /* buffer containing the inode block */

4506

{

4507

struct worklist *wk, *filefree;

4508

struct allocdirect *adp, *nextadp;

4509

struct ufs1_dinode *dp1 = NULL;

4510

struct ufs2_dinode *dp2 = NULL;

4511

int hadchanges, fstype;

4512

4513

if ((inodedep->id_state & IOSTARTED) == 0)

4514

panic("handle_written_inodeblock: not started");

4515

inodedep->id_state &= ~IOSTARTED;

4516

if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) {

4517

fstype = UFS1;

4518

dp1 = (struct ufs1_dinode *)bp->b_data +

4519

ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);

4520

} else {

4521

fstype = UFS2;

4522

dp2 = (struct ufs2_dinode *)bp->b_data +

4523

ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);

4524

}

4525

4526

* If we had to rollback the inode allocation because of

4527

* bitmaps being incomplete, then simply restore it.

4528

* Keep the block dirty so that it will not be reclaimed until

4529

* all associated dependencies have been cleared and the

4530

* corresponding updates written to disk.

4531

4532

if (inodedep->id_savedino1 != NULL) {

4533

if (fstype == UFS1)

4534

*dp1 = *inodedep->id_savedino1;

4535

else

4536

*dp2 = *inodedep->id_savedino2;

4537

FREE(inodedep->id_savedino1, M_SAVEDINO);

4538

inodedep->id_savedino1 = NULL;

4539

if ((bp->b_flags & B_DELWRI) == 0)

4540

stat_inode_bitmap++;

4541

bdirty(bp);

4542

return (1);

4543

}

4544

inodedep->id_state |= COMPLETE;

4545

4546

* Roll forward anything that had to be rolled back before

4547

* the inode could be updated.

4548

4549

hadchanges = 0;

4550

for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {

4551

nextadp = TAILQ_NEXT(adp, ad_next);

4552

if (adp->ad_state & ATTACHED)

4553

panic("handle_written_inodeblock: new entry");

4554

if (fstype == UFS1) {

4555

if (adp->ad_lbn < NDADDR) {

4556

if (dp1->di_db[adp->ad_lbn]!=adp->ad_oldblkno)

4557

panic("%s %s #%jd mismatch %d != %jd",

4558

"handle_written_inodeblock:",

4559

"direct pointer",

4560

(intmax_t)adp->ad_lbn,

4561

dp1->di_db[adp->ad_lbn],

4562

(intmax_t)adp->ad_oldblkno);

4563

dp1->di_db[adp->ad_lbn] = adp->ad_newblkno;

4564

} else {

4565

if (dp1->di_ib[adp->ad_lbn - NDADDR] != 0)

4566

panic("%s: %s #%jd allocated as %d",

4567

"handle_written_inodeblock",

4568

"indirect pointer",

4569

(intmax_t)adp->ad_lbn - NDADDR,

4570

dp1->di_ib[adp->ad_lbn - NDADDR]);

4571

dp1->di_ib[adp->ad_lbn - NDADDR] =

4572

adp->ad_newblkno;

4573

}

4574

} else {

4575

if (adp->ad_lbn < NDADDR) {

4576

if (dp2->di_db[adp->ad_lbn]!=adp->ad_oldblkno)

4577

panic("%s: %s #%jd %s %jd != %jd",

4578

"handle_written_inodeblock",

4579

"direct pointer",

4580

(intmax_t)adp->ad_lbn, "mismatch",

4581

(intmax_t)dp2->di_db[adp->ad_lbn],

4582

(intmax_t)adp->ad_oldblkno);

4583

dp2->di_db[adp->ad_lbn] = adp->ad_newblkno;

4584

} else {

4585

if (dp2->di_ib[adp->ad_lbn - NDADDR] != 0)

4586

panic("%s: %s #%jd allocated as %jd",

4587

"handle_written_inodeblock",

4588

"indirect pointer",

4589

(intmax_t)adp->ad_lbn - NDADDR,

4590

(intmax_t)

4591

dp2->di_ib[adp->ad_lbn - NDADDR]);

4592

dp2->di_ib[adp->ad_lbn - NDADDR] =

4593

adp->ad_newblkno;

4594

}

4595

}

4596

adp->ad_state &= ~UNDONE;

4597

adp->ad_state |= ATTACHED;

4598

hadchanges = 1;

4599

}

4600

for (adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = nextadp) {

4601

nextadp = TAILQ_NEXT(adp, ad_next);

4602

if (adp->ad_state & ATTACHED)

4603

panic("handle_written_inodeblock: new entry");

4604

if (dp2->di_extb[adp->ad_lbn] != adp->ad_oldblkno)

4605

panic("%s: direct pointers #%jd %s %jd != %jd",

4606

"handle_written_inodeblock",

4607

(intmax_t)adp->ad_lbn, "mismatch",

4608

(intmax_t)dp2->di_extb[adp->ad_lbn],

4609

(intmax_t)adp->ad_oldblkno);

4610

dp2->di_extb[adp->ad_lbn] = adp->ad_newblkno;

4611

adp->ad_state &= ~UNDONE;

4612

adp->ad_state |= ATTACHED;

4613

hadchanges = 1;

4614

}

4615

if (hadchanges && (bp->b_flags & B_DELWRI) == 0)

4616

stat_direct_blk_ptrs++;

4617

4618

* Reset the file size to its most up-to-date value.

4619

4620

if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1)

4621

panic("handle_written_inodeblock: bad size");

4622

if (fstype == UFS1) {

4623

if (dp1->di_size != inodedep->id_savedsize) {

4624

dp1->di_size = inodedep->id_savedsize;

4625

hadchanges = 1;

4626

}

4627

} else {

4628

if (dp2->di_size != inodedep->id_savedsize) {

4629

dp2->di_size = inodedep->id_savedsize;

4630

hadchanges = 1;

4631

}

4632

if (dp2->di_extsize != inodedep->id_savedextsize) {

4633

dp2->di_extsize = inodedep->id_savedextsize;

4634

hadchanges = 1;

4635

}

4636

}

4637

inodedep->id_savedsize = -1;

4638

inodedep->id_savedextsize = -1;

4639

4640

* If there were any rollbacks in the inode block, then it must be

4641

* marked dirty so that its will eventually get written back in

4642

* its correct form.

4643

4644

if (hadchanges)

4645

bdirty(bp);

4646

4647

* Process any allocdirects that completed during the update.

4648

4649

if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)

4650

handle_allocdirect_partdone(adp);

4651

if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)

4652

handle_allocdirect_partdone(adp);

4653

4654

* Process deallocations that were held pending until the

4655

* inode had been written to disk. Freeing of the inode

4656

* is delayed until after all blocks have been freed to

4657

* avoid creation of new <vfsid, inum, lbn> triples

4658

* before the old ones have been deleted.

4659

4660

filefree = NULL;

4661

while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {

4662

WORKLIST_REMOVE(wk);

4663

switch (wk->wk_type) {

4664

4665

case D_FREEFILE:

4666

4667

* We defer adding filefree to the worklist until

4668

* all other additions have been made to ensure

4669

* that it will be done after all the old blocks

4670

* have been freed.

4671

4672

if (filefree != NULL)

4673

panic("handle_written_inodeblock: filefree");

4674

filefree = wk;

4675

continue;

4676

4677

case D_MKDIR:

4678

handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);

4679

continue;

4680

4681

case D_DIRADD:

4682

diradd_inode_written(WK_DIRADD(wk), inodedep);

4683

continue;

4684

4685

case D_FREEBLKS:

4686

wk->wk_state |= COMPLETE;

4687

if ((wk->wk_state & ALLCOMPLETE) != ALLCOMPLETE)

4688

continue;

4689

/* -- fall through -- */

4690

case D_FREEFRAG:

4691

case D_DIRREM:

4692

add_to_worklist(wk);

4693

continue;

4694

4695

case D_NEWDIRBLK:

4696

free_newdirblk(WK_NEWDIRBLK(wk));

4697

continue;

4698

4699

default:

4700

panic("handle_written_inodeblock: Unknown type %s",

4701

TYPENAME(wk->wk_type));

4702

/* NOTREACHED */

4703

}

4704

}

4705

if (filefree != NULL) {

4706

if (free_inodedep(inodedep) == 0)

4707

panic("handle_written_inodeblock: live inodedep");

4708

add_to_worklist(filefree);

4709

return (0);

4710

}

4711

4712

4713

* If no outstanding dependencies, free it.

4714

4715

if (free_inodedep(inodedep) ||

4716

(TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&

4717

TAILQ_FIRST(&inodedep->id_extupdt) == 0))

4718

return (0);

4719

return (hadchanges);

4720

}

4721

4722

4723

* Process a diradd entry after its dependent inode has been written.

4724

* This routine must be called with splbio interrupts blocked.

4725

4726

static void

4727

diradd_inode_written(dap, inodedep)

4728

struct diradd *dap;

4729

struct inodedep *inodedep;

4730

{

4731

struct pagedep *pagedep;

4732

4733

dap->da_state |= COMPLETE;

4734

if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {

4735

if (dap->da_state & DIRCHG)

4736

pagedep = dap->da_previous->dm_pagedep;

4737

else

4738

pagedep = dap->da_pagedep;

4739

LIST_REMOVE(dap, da_pdlist);

4740

LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);

4741

}

4742

WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);

4743

}

4744

4745

4746

* Handle the completion of a mkdir dependency.

4747

4748

static void

4749

handle_written_mkdir(mkdir, type)

4750

struct mkdir *mkdir;

4751

int type;

4752

{

4753

struct diradd *dap;

4754

struct pagedep *pagedep;

4755

4756

if (mkdir->md_state != type)

4757

panic("handle_written_mkdir: bad type");

4758

dap = mkdir->md_diradd;

4759

dap->da_state &= ~type;

4760

if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)

4761

dap->da_state |= DEPCOMPLETE;

4762

if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {

4763

if (dap->da_state & DIRCHG)

4764

pagedep = dap->da_previous->dm_pagedep;

4765

else

4766

pagedep = dap->da_pagedep;

4767

LIST_REMOVE(dap, da_pdlist);

4768

LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);

4769

}

4770

LIST_REMOVE(mkdir, md_mkdirs);

4771

WORKITEM_FREE(mkdir, D_MKDIR);

4772

}

4773

4774

4775

* Called from within softdep_disk_write_complete above.

4776

* A write operation was just completed. Removed inodes can

4777

* now be freed and associated block pointers may be committed.

4778

* Note that this routine is always called from interrupt level

4779

* with further splbio interrupts blocked.

4780

4781

static int

4782

handle_written_filepage(pagedep, bp)

4783

struct pagedep *pagedep;

4784

struct buf *bp; /* buffer containing the written page */

4785

{

4786

struct dirrem *dirrem;

4787

struct diradd *dap, *nextdap;

4788

struct direct *ep;

4789

int i, chgs;

4790

4791

if ((pagedep->pd_state & IOSTARTED) == 0)

4792

panic("handle_written_filepage: not started");

4793

pagedep->pd_state &= ~IOSTARTED;

4794

4795

* Process any directory removals that have been committed.

4796

4797

while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {

4798

LIST_REMOVE(dirrem, dm_next);

4799

dirrem->dm_dirinum = pagedep->pd_ino;

4800

add_to_worklist(&dirrem->dm_list);

4801

}

4802

4803

* Free any directory additions that have been committed.

4804

* If it is a newly allocated block, we have to wait until

4805

* the on-disk directory inode claims the new block.

4806

4807

if ((pagedep->pd_state & NEWBLOCK) == 0)

4808

while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)

4809

free_diradd(dap);

4810

4811

* Uncommitted directory entries must be restored.

4812

4813

for (chgs = 0, i = 0; i < DAHASHSZ; i++) {

4814

for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;

4815

dap = nextdap) {

4816

nextdap = LIST_NEXT(dap, da_pdlist);

4817

if (dap->da_state & ATTACHED)

4818

panic("handle_written_filepage: attached");

4819

ep = (struct direct *)

4820

((char *)bp->b_data + dap->da_offset);

4821

ep->d_ino = dap->da_newinum;

4822

dap->da_state &= ~UNDONE;

4823

dap->da_state |= ATTACHED;

4824

chgs = 1;

4825

4826

* If the inode referenced by the directory has

4827

* been written out, then the dependency can be

4828

* moved to the pending list.

4829

4830

if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {

4831

LIST_REMOVE(dap, da_pdlist);

4832

LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,

4833

da_pdlist);

4834

}

4835

}

4836

}

4837

4838

* If there were any rollbacks in the directory, then it must be

4839

* marked dirty so that its will eventually get written back in

4840

* its correct form.

4841

4842

if (chgs) {

4843

if ((bp->b_flags & B_DELWRI) == 0)

4844

stat_dir_entry++;

4845

bdirty(bp);

4846

return (1);

4847

}

4848

4849

* If we are not waiting for a new directory block to be

4850

* claimed by its inode, then the pagedep will be freed.

4851

* Otherwise it will remain to track any new entries on

4852

* the page in case they are fsync'ed.

4853

4854

if ((pagedep->pd_state & NEWBLOCK) == 0) {

4855

LIST_REMOVE(pagedep, pd_hash);

4856

WORKITEM_FREE(pagedep, D_PAGEDEP);

4857

}

4858

return (0);

4859

}

4860

4861

4862

* Writing back in-core inode structures.

4863

4864

* The filesystem only accesses an inode's contents when it occupies an

4865

* "in-core" inode structure. These "in-core" structures are separate from

4866

* the page frames used to cache inode blocks. Only the latter are

4867

* transferred to/from the disk. So, when the updated contents of the

4868

* "in-core" inode structure are copied to the corresponding in-memory inode

4869

* block, the dependencies are also transferred. The following procedure is

4870

* called when copying a dirty "in-core" inode to a cached inode block.

4871

4872

4873

4874

* Called when an inode is loaded from disk. If the effective link count

4875

* differed from the actual link count when it was last flushed, then we

4876

* need to ensure that the correct effective link count is put back.

4877

4878

void

4879

softdep_load_inodeblock(ip)

4880

struct inode *ip; /* the "in_core" copy of the inode */

4881

{

4882

struct inodedep *inodedep;

4883

4884

4885

* Check for alternate nlink count.

4886

4887

ip->i_effnlink = ip->i_nlink;

4888

ACQUIRE_LOCK(&lk);

4889

if (inodedep_lookup(UFSTOVFS(ip->i_ump),

4890

ip->i_number, 0, &inodedep) == 0) {

4891

FREE_LOCK(&lk);

4892

return;

4893

}

4894

ip->i_effnlink -= inodedep->id_nlinkdelta;

4895

if (inodedep->id_state & SPACECOUNTED)

4896

ip->i_flag |= IN_SPACECOUNTED;

4897

FREE_LOCK(&lk);

4898

}

4899

4900

4901

* This routine is called just before the "in-core" inode

4902

* information is to be copied to the in-memory inode block.

4903

* Recall that an inode block contains several inodes. If

4904

* the force flag is set, then the dependencies will be

4905

* cleared so that the update can always be made. Note that

4906

* the buffer is locked when this routine is called, so we

4907

* will never be in the middle of writing the inode block

4908

* to disk.

4909

4910

void

4911

softdep_update_inodeblock(ip, bp, waitfor)

4912

struct inode *ip; /* the "in_core" copy of the inode */

4913

struct buf *bp; /* the buffer containing the inode block */

4914

int waitfor; /* nonzero => update must be allowed */

4915

{

4916

struct inodedep *inodedep;

4917

struct worklist *wk;

4918

struct mount *mp;

4919

struct buf *ibp;

4920

int error;

4921

4922

4923

* If the effective link count is not equal to the actual link

4924

* count, then we must track the difference in an inodedep while

4925

* the inode is (potentially) tossed out of the cache. Otherwise,

4926

* if there is no existing inodedep, then there are no dependencies

4927

* to track.

4928

4929

mp = UFSTOVFS(ip->i_ump);

4930

ACQUIRE_LOCK(&lk);

4931

if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {

4932

FREE_LOCK(&lk);

4933

if (ip->i_effnlink != ip->i_nlink)

4934

panic("softdep_update_inodeblock: bad link count");

4935

return;

4936

}

4937

if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)

4938

panic("softdep_update_inodeblock: bad delta");

4939

4940

* Changes have been initiated. Anything depending on these

4941

* changes cannot occur until this inode has been written.

4942

4943

inodedep->id_state &= ~COMPLETE;

4944

if ((inodedep->id_state & ONWORKLIST) == 0)

4945

WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);

4946

4947

* Any new dependencies associated with the incore inode must

4948

* now be moved to the list associated with the buffer holding

4949

* the in-memory copy of the inode. Once merged process any

4950

* allocdirects that are completed by the merger.

4951

4952

merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt);

4953

if (!TAILQ_EMPTY(&inodedep->id_inoupdt))

4954

handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));

4955

merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt);

4956

if (!TAILQ_EMPTY(&inodedep->id_extupdt))

4957

handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt));

4958

4959

* Now that the inode has been pushed into the buffer, the

4960

* operations dependent on the inode being written to disk

4961

* can be moved to the id_bufwait so that they will be

4962

* processed when the buffer I/O completes.

4963

4964

while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {

4965

WORKLIST_REMOVE(wk);

4966

WORKLIST_INSERT(&inodedep->id_bufwait, wk);

4967

}

4968

4969

* Newly allocated inodes cannot be written until the bitmap

4970

* that allocates them have been written (indicated by

4971

* DEPCOMPLETE being set in id_state). If we are doing a

4972

* forced sync (e.g., an fsync on a file), we force the bitmap

4973

* to be written so that the update can be done.

4974

4975

if (waitfor == 0) {

4976

FREE_LOCK(&lk);

4977

return;

4978

}

4979

retry:

4980

if ((inodedep->id_state & DEPCOMPLETE) != 0) {

4981

FREE_LOCK(&lk);

4982

return;

4983

}

4984

ibp = inodedep->id_buf;

4985

ibp = getdirtybuf(ibp, &lk, MNT_WAIT);

4986

if (ibp == NULL) {

4987

4988

* If ibp came back as NULL, the dependency could have been

4989

* freed while we slept. Look it up again, and check to see

4990

* that it has completed.

4991

4992

if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)

4993

goto retry;

4994

FREE_LOCK(&lk);

4995

return;

4996

}

4997

FREE_LOCK(&lk);

4998

if ((error = bwrite(ibp)) != 0)

4999

softdep_error("softdep_update_inodeblock: bwrite", error);

5000

}

5001

5002

5003

* Merge the a new inode dependency list (such as id_newinoupdt) into an

5004

* old inode dependency list (such as id_inoupdt). This routine must be

5005

* called with splbio interrupts blocked.

5006

5007

static void

5008

merge_inode_lists(newlisthead, oldlisthead)

5009

struct allocdirectlst *newlisthead;

5010

struct allocdirectlst *oldlisthead;

5011

{

5012

struct allocdirect *listadp, *newadp;

5013

5014

newadp = TAILQ_FIRST(newlisthead);

5015

for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) {

5016

if (listadp->ad_lbn < newadp->ad_lbn) {

5017

listadp = TAILQ_NEXT(listadp, ad_next);

5018

continue;

5019

}

5020

TAILQ_REMOVE(newlisthead, newadp, ad_next);

5021

TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);

5022

if (listadp->ad_lbn == newadp->ad_lbn) {

5023

allocdirect_merge(oldlisthead, newadp,

5024

listadp);

5025

listadp = newadp;

5026

}

5027

newadp = TAILQ_FIRST(newlisthead);

5028

}

5029

while ((newadp = TAILQ_FIRST(newlisthead)) != NULL) {

5030

TAILQ_REMOVE(newlisthead, newadp, ad_next);

5031

TAILQ_INSERT_TAIL(oldlisthead, newadp, ad_next);

5032

}

5033

}

5034

5035

5036

* If we are doing an fsync, then we must ensure that any directory

5037

* entries for the inode have been written after the inode gets to disk.

5038

5039

int

5040

softdep_fsync(vp)

5041

struct vnode *vp; /* the "in_core" copy of the inode */

5042

{

5043

struct inodedep *inodedep;

5044

struct pagedep *pagedep;

5045

struct worklist *wk;

5046

struct diradd *dap;

5047

struct mount *mp;

5048

struct vnode *pvp;

5049

struct inode *ip;

5050

struct buf *bp;

5051

struct fs *fs;

5052

struct thread *td = curthread;

5053

int error, flushparent, pagedep_new_block;

5054

ino_t parentino;

5055

ufs_lbn_t lbn;

5056

5057

ip = VTOI(vp);

5058

fs = ip->i_fs;

5059

mp = vp->v_mount;

5060

ACQUIRE_LOCK(&lk);

5061

if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {

5062

FREE_LOCK(&lk);

5063

return (0);

5064

}

5065

if (!LIST_EMPTY(&inodedep->id_inowait) ||

5066

!LIST_EMPTY(&inodedep->id_bufwait) ||

5067

!TAILQ_EMPTY(&inodedep->id_extupdt) ||

5068

!TAILQ_EMPTY(&inodedep->id_newextupdt) ||

5069

!TAILQ_EMPTY(&inodedep->id_inoupdt) ||

5070

!TAILQ_EMPTY(&inodedep->id_newinoupdt))

5071

panic("softdep_fsync: pending ops");

5072

for (error = 0, flushparent = 0; ; ) {

5073

if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)

5074

break;

5075

if (wk->wk_type != D_DIRADD)

5076

panic("softdep_fsync: Unexpected type %s",

5077

TYPENAME(wk->wk_type));

5078

dap = WK_DIRADD(wk);

5079

5080

* Flush our parent if this directory entry has a MKDIR_PARENT

5081

* dependency or is contained in a newly allocated block.

5082

5083

if (dap->da_state & DIRCHG)

5084

pagedep = dap->da_previous->dm_pagedep;

5085

else

5086

pagedep = dap->da_pagedep;

5087

parentino = pagedep->pd_ino;

5088

lbn = pagedep->pd_lbn;

5089

if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE)

5090

panic("softdep_fsync: dirty");

5091

if ((dap->da_state & MKDIR_PARENT) ||

5092

(pagedep->pd_state & NEWBLOCK))

5093

flushparent = 1;

5094

else

5095

flushparent = 0;

5096

5097

* If we are being fsync'ed as part of vgone'ing this vnode,

5098

* then we will not be able to release and recover the

5099

* vnode below, so we just have to give up on writing its

5100

* directory entry out. It will eventually be written, just

5101

* not now, but then the user was not asking to have it

5102

* written, so we are not breaking any promises.

5103

5104

if (vp->v_iflag & VI_DOOMED)

5105

break;

5106

5107

* We prevent deadlock by always fetching inodes from the

5108

* root, moving down the directory tree. Thus, when fetching

5109

* our parent directory, we first try to get the lock. If

5110

* that fails, we must unlock ourselves before requesting

5111

* the lock on our parent. See the comment in ufs_lookup

5112

* for details on possible races.

5113

5114

FREE_LOCK(&lk);

5115

if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,

5116

FFSV_FORCEINSMQ)) {

5117

VOP_UNLOCK(vp, 0, td);

5118

error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE,

5119

&pvp, FFSV_FORCEINSMQ);

5120

vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);

5121

if (error != 0)

5122

return (error);

5123

}

5124

5125

* All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps

5126

* that are contained in direct blocks will be resolved by

5127

* doing a ffs_update. Pagedeps contained in indirect blocks

5128

* may require a complete sync'ing of the directory. So, we

5129

* try the cheap and fast ffs_update first, and if that fails,

5130

* then we do the slower ffs_syncvnode of the directory.

5131

5132

if (flushparent) {

5133

int locked;

5134

5135

if ((error = ffs_update(pvp, 1)) != 0) {

5136

vput(pvp);

5137

return (error);

5138

}

5139

ACQUIRE_LOCK(&lk);

5140

locked = 1;

5141

if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) {

5142

if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) != NULL) {

5143

if (wk->wk_type != D_DIRADD)

5144

panic("softdep_fsync: Unexpected type %s",

5145

TYPENAME(wk->wk_type));

5146

dap = WK_DIRADD(wk);

5147

if (dap->da_state & DIRCHG)

5148

pagedep = dap->da_previous->dm_pagedep;

5149

else

5150

pagedep = dap->da_pagedep;

5151

pagedep_new_block = pagedep->pd_state & NEWBLOCK;

5152

FREE_LOCK(&lk);

5153

locked = 0;

5154

if (pagedep_new_block &&

5155

(error = ffs_syncvnode(pvp, MNT_WAIT))) {

5156

vput(pvp);

5157

return (error);

5158

}

5159

}

5160

}

5161

if (locked)

5162

FREE_LOCK(&lk);

5163

}

5164

5165

* Flush directory page containing the inode's name.

5166

5167

error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), td->td_ucred,

5168

&bp);

5169

if (error == 0)

5170

error = bwrite(bp);

5171

else

5172

brelse(bp);

5173

vput(pvp);

5174

if (error != 0)

5175

return (error);

5176

ACQUIRE_LOCK(&lk);

5177

if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)

5178

break;

5179

}

5180

FREE_LOCK(&lk);

5181

return (0);

5182

}

5183

5184

5185

* Flush all the dirty bitmaps associated with the block device

5186

* before flushing the rest of the dirty blocks so as to reduce

5187

* the number of dependencies that will have to be rolled back.

5188

5189

void

5190

softdep_fsync_mountdev(vp)

5191

struct vnode *vp;

5192

{

5193

struct buf *bp, *nbp;

5194

struct worklist *wk;

5195

5196

if (!vn_isdisk(vp, NULL))

5197

panic("softdep_fsync_mountdev: vnode not a disk");

5198

restart:

5199

ACQUIRE_LOCK(&lk);

5200

VI_LOCK(vp);

5201

TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {

5202

5203

* If it is already scheduled, skip to the next buffer.

5204

5205

if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))

5206

continue;

5207

5208

if ((bp->b_flags & B_DELWRI) == 0)

5209

panic("softdep_fsync_mountdev: not dirty");

5210

5211

* We are only interested in bitmaps with outstanding

5212

* dependencies.

5213

5214

if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||

5215

wk->wk_type != D_BMSAFEMAP ||

5216

(bp->b_vflags & BV_BKGRDINPROG)) {

5217

BUF_UNLOCK(bp);

5218

continue;

5219

}

5220

VI_UNLOCK(vp);

5221

FREE_LOCK(&lk);

5222

bremfree(bp);

5223

(void) bawrite(bp);

5224

goto restart;

5225

}

5226

FREE_LOCK(&lk);

5227

drain_output(vp);

5228

VI_UNLOCK(vp);

5229

}

5230

5231

5232

* This routine is called when we are trying to synchronously flush a

5233

* file. This routine must eliminate any filesystem metadata dependencies

5234

* so that the syncing routine can succeed by pushing the dirty blocks

5235

* associated with the file. If any I/O errors occur, they are returned.

5236

5237

int

5238

softdep_sync_metadata(struct vnode *vp)

5239

{

5240

struct pagedep *pagedep;

5241

struct allocdirect *adp;

5242

struct allocindir *aip;

5243

struct buf *bp, *nbp;

5244

struct worklist *wk;

5245

int i, error, waitfor;

5246

5247

if (!DOINGSOFTDEP(vp))

5248

return (0);

5249

5250

* Ensure that any direct block dependencies have been cleared.

5251

5252

ACQUIRE_LOCK(&lk);

5253

if ((error = flush_inodedep_deps(vp->v_mount, VTOI(vp)->i_number))) {

5254

FREE_LOCK(&lk);

5255

return (error);

5256

}

5257

FREE_LOCK(&lk);

5258

5259

* For most files, the only metadata dependencies are the

5260

* cylinder group maps that allocate their inode or blocks.

5261

* The block allocation dependencies can be found by traversing

5262

* the dependency lists for any buffers that remain on their

5263

* dirty buffer list. The inode allocation dependency will

5264

* be resolved when the inode is updated with MNT_WAIT.

5265

* This work is done in two passes. The first pass grabs most

5266

* of the buffers and begins asynchronously writing them. The

5267

* only way to wait for these asynchronous writes is to sleep

5268

* on the filesystem vnode which may stay busy for a long time

5269

* if the filesystem is active. So, instead, we make a second

5270

* pass over the dependencies blocking on each write. In the

5271

* usual case we will be blocking against a write that we

5272

* initiated, so when it is done the dependency will have been

5273

* resolved. Thus the second pass is expected to end quickly.

5274

5275

waitfor = MNT_NOWAIT;

5276

5277

top:

5278

5279

* We must wait for any I/O in progress to finish so that

5280

* all potential buffers on the dirty list will be visible.

5281

5282

VI_LOCK(vp);

5283

drain_output(vp);

5284

while ((bp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd)) != NULL) {

5285

bp = getdirtybuf(bp, VI_MTX(vp), MNT_WAIT);

5286

if (bp)

5287

break;

5288

}

5289

VI_UNLOCK(vp);

5290

if (bp == NULL)

5291

return (0);

5292

loop:

5293

/* While syncing snapshots, we must allow recursive lookups */

5294

bp->b_lock.lk_flags |= LK_CANRECURSE;

5295

ACQUIRE_LOCK(&lk);

5296

5297

* As we hold the buffer locked, none of its dependencies

5298

* will disappear.

5299

5300

LIST_FOREACH(wk, &bp->b_dep, wk_list) {

5301

switch (wk->wk_type) {

5302

5303

case D_ALLOCDIRECT:

5304

adp = WK_ALLOCDIRECT(wk);

5305

if (adp->ad_state & DEPCOMPLETE)

5306

continue;

5307

nbp = adp->ad_buf;

5308

nbp = getdirtybuf(nbp, &lk, waitfor);

5309

if (nbp == NULL)

5310

continue;

5311

FREE_LOCK(&lk);

5312

if (waitfor == MNT_NOWAIT) {

5313

bawrite(nbp);

5314

} else if ((error = bwrite(nbp)) != 0) {

5315

break;

5316

}

5317

ACQUIRE_LOCK(&lk);

5318

continue;

5319

5320

case D_ALLOCINDIR:

5321

aip = WK_ALLOCINDIR(wk);

5322

if (aip->ai_state & DEPCOMPLETE)

5323

continue;

5324

nbp = aip->ai_buf;

5325

nbp = getdirtybuf(nbp, &lk, waitfor);

5326

if (nbp == NULL)

5327

continue;

5328

FREE_LOCK(&lk);

5329

if (waitfor == MNT_NOWAIT) {

5330

bawrite(nbp);

5331

} else if ((error = bwrite(nbp)) != 0) {

5332

break;

5333

}

5334

ACQUIRE_LOCK(&lk);

5335

continue;

5336

5337

case D_INDIRDEP:

5338

restart:

5339

5340

LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {

5341

if (aip->ai_state & DEPCOMPLETE)

5342

continue;

5343

nbp = aip->ai_buf;

5344

nbp = getdirtybuf(nbp, &lk, MNT_WAIT);

5345

if (nbp == NULL)

5346

goto restart;

5347

FREE_LOCK(&lk);

5348

if ((error = bwrite(nbp)) != 0) {

5349

goto loop_end;

5350

}

5351

ACQUIRE_LOCK(&lk);

5352

goto restart;

5353

}

5354

continue;

5355

5356

case D_INODEDEP:

5357

if ((error = flush_inodedep_deps(wk->wk_mp,

5358

WK_INODEDEP(wk)->id_ino)) != 0) {

5359

FREE_LOCK(&lk);

5360

break;

5361

}

5362

continue;

5363

5364

case D_PAGEDEP:

5365

5366

* We are trying to sync a directory that may

5367

* have dependencies on both its own metadata

5368

* and/or dependencies on the inodes of any

5369

* recently allocated files. We walk its diradd

5370

* lists pushing out the associated inode.

5371

5372

pagedep = WK_PAGEDEP(wk);

5373

for (i = 0; i < DAHASHSZ; i++) {

5374

if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)

5375

continue;

5376

if ((error =

5377

flush_pagedep_deps(vp, wk->wk_mp,

5378

&pagedep->pd_diraddhd[i]))) {

5379

FREE_LOCK(&lk);

5380

goto loop_end;

5381

}

5382

}

5383

continue;

5384

5385

case D_MKDIR:

5386

5387

* This case should never happen if the vnode has

5388

* been properly sync'ed. However, if this function

5389

* is used at a place where the vnode has not yet

5390

* been sync'ed, this dependency can show up. So,

5391

* rather than panic, just flush it.

5392

5393

nbp = WK_MKDIR(wk)->md_buf;

5394

nbp = getdirtybuf(nbp, &lk, waitfor);

5395

if (nbp == NULL)

5396

continue;

5397

FREE_LOCK(&lk);

5398

if (waitfor == MNT_NOWAIT) {

5399

bawrite(nbp);

5400

} else if ((error = bwrite(nbp)) != 0) {

5401

break;

5402

}

5403

ACQUIRE_LOCK(&lk);

5404

continue;

5405

5406

case D_BMSAFEMAP:

5407

5408

* This case should never happen if the vnode has

5409

* been properly sync'ed. However, if this function

5410

* is used at a place where the vnode has not yet

5411

* been sync'ed, this dependency can show up. So,

5412

* rather than panic, just flush it.

5413

5414

nbp = WK_BMSAFEMAP(wk)->sm_buf;

5415

nbp = getdirtybuf(nbp, &lk, waitfor);

5416

if (nbp == NULL)

5417

continue;

5418

FREE_LOCK(&lk);

5419

if (waitfor == MNT_NOWAIT) {

5420

bawrite(nbp);

5421

} else if ((error = bwrite(nbp)) != 0) {

5422

break;

5423

}

5424

ACQUIRE_LOCK(&lk);

5425

continue;

5426

5427

default:

5428

panic("softdep_sync_metadata: Unknown type %s",

5429

TYPENAME(wk->wk_type));

5430

/* NOTREACHED */

5431

}

5432

loop_end:

5433

/* We reach here only in error and unlocked */

5434

if (error == 0)

5435

panic("softdep_sync_metadata: zero error");

5436

bp->b_lock.lk_flags &= ~LK_CANRECURSE;

5437

bawrite(bp);

5438

return (error);

5439

}

5440

FREE_LOCK(&lk);

5441

VI_LOCK(vp);

5442

while ((nbp = TAILQ_NEXT(bp, b_bobufs)) != NULL) {

5443

nbp = getdirtybuf(nbp, VI_MTX(vp), MNT_WAIT);

5444

if (nbp)

5445

break;

5446

}

5447

VI_UNLOCK(vp);

5448

bp->b_lock.lk_flags &= ~LK_CANRECURSE;

5449

bawrite(bp);

5450

if (nbp != NULL) {

5451

bp = nbp;

5452

goto loop;

5453

}

5454

5455

* The brief unlock is to allow any pent up dependency

5456

* processing to be done. Then proceed with the second pass.

5457

5458

if (waitfor == MNT_NOWAIT) {

5459

waitfor = MNT_WAIT;

5460

goto top;

5461

}

5462

5463

5464

* If we have managed to get rid of all the dirty buffers,

5465

* then we are done. For certain directories and block

5466

* devices, we may need to do further work.

5467

5468

* We must wait for any I/O in progress to finish so that

5469

* all potential buffers on the dirty list will be visible.

5470

5471

VI_LOCK(vp);

5472

drain_output(vp);

5473

VI_UNLOCK(vp);

5474

return (0);

5475

}

5476

5477

5478

* Flush the dependencies associated with an inodedep.

5479

* Called with splbio blocked.

5480

5481

static int

5482

flush_inodedep_deps(mp, ino)

5483

struct mount *mp;

5484

ino_t ino;

5485

{

5486

struct inodedep *inodedep;

5487

int error, waitfor;

5488

5489

5490

* This work is done in two passes. The first pass grabs most

5491

* of the buffers and begins asynchronously writing them. The

5492

* only way to wait for these asynchronous writes is to sleep

5493

* on the filesystem vnode which may stay busy for a long time

5494

* if the filesystem is active. So, instead, we make a second

5495

* pass over the dependencies blocking on each write. In the

5496

* usual case we will be blocking against a write that we

5497

* initiated, so when it is done the dependency will have been

5498

* resolved. Thus the second pass is expected to end quickly.

5499

* We give a brief window at the top of the loop to allow

5500

* any pending I/O to complete.

5501

5502

for (error = 0, waitfor = MNT_NOWAIT; ; ) {

5503

if (error)

5504

return (error);

5505

FREE_LOCK(&lk);

5506

ACQUIRE_LOCK(&lk);

5507

if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)

5508

return (0);

5509

if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||

5510

flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) ||

5511

flush_deplist(&inodedep->id_extupdt, waitfor, &error) ||

5512

flush_deplist(&inodedep->id_newextupdt, waitfor, &error))

5513

continue;

5514

5515

* If pass2, we are done, otherwise do pass 2.

5516

5517

if (waitfor == MNT_WAIT)

5518

break;

5519

waitfor = MNT_WAIT;

5520

}

5521

5522

* Try freeing inodedep in case all dependencies have been removed.

5523

5524

if (inodedep_lookup(mp, ino, 0, &inodedep) != 0)

5525

(void) free_inodedep(inodedep);

5526

return (0);

5527

}

5528

5529

5530

* Flush an inode dependency list.

5531

* Called with splbio blocked.

5532

5533

static int

5534

flush_deplist(listhead, waitfor, errorp)

5535

struct allocdirectlst *listhead;

5536

int waitfor;

5537

int *errorp;

5538

{

5539

struct allocdirect *adp;

5540

struct buf *bp;

5541

5542

mtx_assert(&lk, MA_OWNED);

5543

TAILQ_FOREACH(adp, listhead, ad_next) {

5544

if (adp->ad_state & DEPCOMPLETE)

5545

continue;

5546

bp = adp->ad_buf;

5547

bp = getdirtybuf(bp, &lk, waitfor);

5548

if (bp == NULL) {

5549

if (waitfor == MNT_NOWAIT)

5550

continue;

5551

return (1);

5552

}

5553

FREE_LOCK(&lk);

5554

if (waitfor == MNT_NOWAIT) {

5555

bawrite(bp);

5556

} else if ((*errorp = bwrite(bp)) != 0) {

5557

ACQUIRE_LOCK(&lk);

5558

return (1);

5559

}

5560

ACQUIRE_LOCK(&lk);

5561

return (1);

5562

}

5563

return (0);

5564

}

5565

5566

5567

* Eliminate a pagedep dependency by flushing out all its diradd dependencies.

5568

* Called with splbio blocked.

5569

5570

static int

5571

flush_pagedep_deps(pvp, mp, diraddhdp)

5572

struct vnode *pvp;

5573

struct mount *mp;

5574

struct diraddhd *diraddhdp;

5575

{

5576

struct inodedep *inodedep;

5577

struct ufsmount *ump;

5578

struct diradd *dap;

5579

struct vnode *vp;

5580

int error = 0;

5581

struct buf *bp;

5582

ino_t inum;

5583

struct worklist *wk;

5584

5585

ump = VFSTOUFS(mp);

5586

while ((dap = LIST_FIRST(diraddhdp)) != NULL) {

5587

5588

* Flush ourselves if this directory entry

5589

* has a MKDIR_PARENT dependency.

5590

5591

if (dap->da_state & MKDIR_PARENT) {

5592

FREE_LOCK(&lk);

5593

if ((error = ffs_update(pvp, 1)) != 0)

5594

break;

5595

ACQUIRE_LOCK(&lk);

5596

5597

* If that cleared dependencies, go on to next.

5598

5599

if (dap != LIST_FIRST(diraddhdp))

5600

continue;

5601

if (dap->da_state & MKDIR_PARENT)

5602

panic("flush_pagedep_deps: MKDIR_PARENT");

5603

}

5604

5605

* A newly allocated directory must have its "." and

5606

* ".." entries written out before its name can be

5607

* committed in its parent. We do not want or need

5608

* the full semantics of a synchronous ffs_syncvnode as

5609

* that may end up here again, once for each directory

5610

* level in the filesystem. Instead, we push the blocks

5611

* and wait for them to clear. We have to fsync twice

5612

* because the first call may choose to defer blocks

5613

* that still have dependencies, but deferral will

5614

* happen at most once.

5615

5616

inum = dap->da_newinum;

5617

if (dap->da_state & MKDIR_BODY) {

5618

FREE_LOCK(&lk);

5619

if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,

5620

FFSV_FORCEINSMQ)))

5621

break;

5622

if ((error=ffs_syncvnode(vp, MNT_NOWAIT)) ||

5623

(error=ffs_syncvnode(vp, MNT_NOWAIT))) {

5624

vput(vp);

5625

break;

5626

}

5627

VI_LOCK(vp);

5628

drain_output(vp);

5629

5630

* If first block is still dirty with a D_MKDIR

5631

* dependency then it needs to be written now.

5632

5633

for (;;) {

5634

error = 0;

5635

bp = gbincore(&vp->v_bufobj, 0);

5636

if (bp == NULL)

5637

break; /* First block not present */

5638

error = BUF_LOCK(bp,

5639

LK_EXCLUSIVE |

5640

LK_SLEEPFAIL |

5641

LK_INTERLOCK,

5642

VI_MTX(vp));

5643

VI_LOCK(vp);

5644

if (error == ENOLCK)

5645

continue; /* Slept, retry */

5646

if (error != 0)

5647

break; /* Failed */

5648

if ((bp->b_flags & B_DELWRI) == 0) {

5649

BUF_UNLOCK(bp);

5650

break; /* Buffer not dirty */

5651

}

5652

for (wk = LIST_FIRST(&bp->b_dep);

5653

wk != NULL;

5654

wk = LIST_NEXT(wk, wk_list))

5655

if (wk->wk_type == D_MKDIR)

5656

break;

5657

if (wk == NULL)

5658

BUF_UNLOCK(bp); /* Dependency gone */

5659

else {

5660

5661

* D_MKDIR dependency remains,

5662

* must write buffer to stable

5663

* storage.

5664

5665

VI_UNLOCK(vp);

5666

bremfree(bp);

5667

error = bwrite(bp);

5668

VI_LOCK(vp);

5669

}

5670

break;

5671

}

5672

VI_UNLOCK(vp);

5673

vput(vp);

5674

if (error != 0)

5675

break; /* Flushing of first block failed */

5676

ACQUIRE_LOCK(&lk);

5677

5678

* If that cleared dependencies, go on to next.

5679

5680

if (dap != LIST_FIRST(diraddhdp))

5681

continue;

5682

if (dap->da_state & MKDIR_BODY)

5683

panic("flush_pagedep_deps: MKDIR_BODY");

5684

}

5685

5686

* Flush the inode on which the directory entry depends.

5687

* Having accounted for MKDIR_PARENT and MKDIR_BODY above,

5688

* the only remaining dependency is that the updated inode

5689

* count must get pushed to disk. The inode has already

5690

* been pushed into its inode buffer (via VOP_UPDATE) at

5691

* the time of the reference count change. So we need only

5692

* locate that buffer, ensure that there will be no rollback

5693

* caused by a bitmap dependency, then write the inode buffer.

5694

5695

retry:

5696

if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)

5697

panic("flush_pagedep_deps: lost inode");

5698

5699

* If the inode still has bitmap dependencies,

5700

* push them to disk.

5701

5702

if ((inodedep->id_state & DEPCOMPLETE) == 0) {

5703

bp = inodedep->id_buf;

5704

bp = getdirtybuf(bp, &lk, MNT_WAIT);

5705

if (bp == NULL)

5706

goto retry;

5707

FREE_LOCK(&lk);

5708

if ((error = bwrite(bp)) != 0)

5709

break;

5710

ACQUIRE_LOCK(&lk);

5711

if (dap != LIST_FIRST(diraddhdp))

5712

continue;

5713

}

5714

5715

* If the inode is still sitting in a buffer waiting

5716

* to be written, push it to disk.

5717

5718

FREE_LOCK(&lk);

5719

if ((error = bread(ump->um_devvp,

5720

fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),

5721

(int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0) {

5722

brelse(bp);

5723

break;

5724

}

5725

if ((error = bwrite(bp)) != 0)

5726

break;

5727

ACQUIRE_LOCK(&lk);

5728

5729

* If we have failed to get rid of all the dependencies

5730

* then something is seriously wrong.

5731

5732

if (dap == LIST_FIRST(diraddhdp))

5733

panic("flush_pagedep_deps: flush failed");

5734

}

5735

if (error)

5736

ACQUIRE_LOCK(&lk);

5737

return (error);

5738

}

5739

5740

5741

* A large burst of file addition or deletion activity can drive the

5742

* memory load excessively high. First attempt to slow things down

5743

* using the techniques below. If that fails, this routine requests

5744

* the offending operations to fall back to running synchronously

5745

* until the memory load returns to a reasonable level.

5746

5747

int

5748

softdep_slowdown(vp)

5749

struct vnode *vp;

5750

{

5751

int max_softdeps_hard;

5752

5753

ACQUIRE_LOCK(&lk);

5754

max_softdeps_hard = max_softdeps * 11 / 10;

5755

if (num_dirrem < max_softdeps_hard / 2 &&

5756

num_inodedep < max_softdeps_hard &&

5757

VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&

5758

num_freeblkdep < max_softdeps_hard) {

5759

FREE_LOCK(&lk);

5760

return (0);

5761

}

5762

if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps)

5763

softdep_speedup();

5764

stat_sync_limit_hit += 1;

5765

FREE_LOCK(&lk);

5766

return (1);

5767

}

5768

5769

5770

* Called by the allocation routines when they are about to fail

5771

* in the hope that we can free up some disk space.

5772

5773

* First check to see if the work list has anything on it. If it has,

5774

* clean up entries until we successfully free some space. Because this

5775

* process holds inodes locked, we cannot handle any remove requests

5776

* that might block on a locked inode as that could lead to deadlock.

5777

* If the worklist yields no free space, encourage the syncer daemon

5778

* to help us. In no event will we try for longer than tickdelay seconds.

5779

5780

int

5781

softdep_request_cleanup(fs, vp)

5782

struct fs *fs;

5783

struct vnode *vp;

5784

{

5785

struct ufsmount *ump;

5786

long starttime;

5787

ufs2_daddr_t needed;

5788

int error;

5789

5790

ump = VTOI(vp)->i_ump;

5791

mtx_assert(UFS_MTX(ump), MA_OWNED);

5792

needed = fs->fs_cstotal.cs_nbfree + fs->fs_contigsumsize;

5793

starttime = time_second + tickdelay;

5794

5795

* If we are being called because of a process doing a

5796

* copy-on-write, then it is not safe to update the vnode

5797

* as we may recurse into the copy-on-write routine.

5798

5799

if (!(curthread->td_pflags & TDP_COWINPROGRESS)) {

5800

UFS_UNLOCK(ump);

5801

error = ffs_update(vp, 1);

5802

UFS_LOCK(ump);

5803

if (error != 0)

5804

return (0);

5805

}

5806

while (fs->fs_pendingblocks > 0 && fs->fs_cstotal.cs_nbfree <= needed) {

5807

if (time_second > starttime)

5808

return (0);

5809

UFS_UNLOCK(ump);

5810

ACQUIRE_LOCK(&lk);

5811

if (ump->softdep_on_worklist > 0 &&

5812

process_worklist_item(UFSTOVFS(ump), LK_NOWAIT) != -1) {

5813

stat_worklist_push += 1;

5814

FREE_LOCK(&lk);

5815

UFS_LOCK(ump);

5816

continue;

5817

}

5818

request_cleanup(UFSTOVFS(ump), FLUSH_REMOVE_WAIT);

5819

FREE_LOCK(&lk);

5820

UFS_LOCK(ump);

5821

}

5822

return (1);

5823

}

5824

5825

5826

* If memory utilization has gotten too high, deliberately slow things

5827

* down and speed up the I/O processing.

5828

5829

extern struct thread *syncertd;

5830

static int

5831

request_cleanup(mp, resource)

5832

struct mount *mp;

5833

int resource;

5834

{

5835

struct thread *td = curthread;

5836

struct ufsmount *ump;

5837

5838

mtx_assert(&lk, MA_OWNED);

5839

5840

* We never hold up the filesystem syncer or buf daemon.

5841

5842

if (td->td_pflags & (TDP_SOFTDEP|TDP_NORUNNINGBUF))

5843

return (0);

5844

ump = VFSTOUFS(mp);

5845

5846

* First check to see if the work list has gotten backlogged.

5847

* If it has, co-opt this process to help clean up two entries.

5848

* Because this process may hold inodes locked, we cannot

5849

* handle any remove requests that might block on a locked

5850

* inode as that could lead to deadlock. We set TDP_SOFTDEP

5851

* to avoid recursively processing the worklist.

5852

5853

if (ump->softdep_on_worklist > max_softdeps / 10) {

5854

td->td_pflags |= TDP_SOFTDEP;

5855

process_worklist_item(mp, LK_NOWAIT);

5856

process_worklist_item(mp, LK_NOWAIT);

5857

td->td_pflags &= ~TDP_SOFTDEP;

5858

stat_worklist_push += 2;

5859

return(1);

5860

}

5861

5862

* Next, we attempt to speed up the syncer process. If that

5863

* is successful, then we allow the process to continue.

5864

5865

if (softdep_speedup() && resource != FLUSH_REMOVE_WAIT)

5866

return(0);

5867

5868

* If we are resource constrained on inode dependencies, try

5869

* flushing some dirty inodes. Otherwise, we are constrained

5870

* by file deletions, so try accelerating flushes of directories

5871

* with removal dependencies. We would like to do the cleanup

5872

* here, but we probably hold an inode locked at this point and

5873

* that might deadlock against one that we try to clean. So,

5874

* the best that we can do is request the syncer daemon to do

5875

* the cleanup for us.

5876

5877

switch (resource) {

5878

5879

case FLUSH_INODES:

5880

stat_ino_limit_push += 1;

5881

req_clear_inodedeps += 1;

5882

stat_countp = &stat_ino_limit_hit;

5883

break;

5884

5885

case FLUSH_REMOVE:

5886

case FLUSH_REMOVE_WAIT:

5887

stat_blk_limit_push += 1;

5888

req_clear_remove += 1;

5889

stat_countp = &stat_blk_limit_hit;

5890

break;

5891

5892

default:

5893

panic("request_cleanup: unknown type");

5894

}

5895

5896

* Hopefully the syncer daemon will catch up and awaken us.

5897

* We wait at most tickdelay before proceeding in any case.

5898

5899

proc_waiting += 1;

5900

if (callout_pending(&softdep_callout) == FALSE)

5901

callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,

5902

pause_timer, 0);

5903

5904

msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);

5905

proc_waiting -= 1;

5906

return (1);

5907

}

5908

5909

5910

* Awaken processes pausing in request_cleanup and clear proc_waiting

5911

* to indicate that there is no longer a timer running.

5912

5913

static void

5914

pause_timer(arg)

5915

void *arg;

5916

{

5917

5918

5919

* The callout_ API has acquired mtx and will hold it around this

5920

* function call.

5921

5922

*stat_countp += 1;

5923

wakeup_one(&proc_waiting);

5924

if (proc_waiting > 0)

5925

callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,

5926

pause_timer, 0);

5927

}

5928

5929

5930

* Flush out a directory with at least one removal dependency in an effort to

5931

* reduce the number of dirrem, freefile, and freeblks dependency structures.

5932

5933

static void

5934

clear_remove(td)

5935

struct thread *td;

5936

{

5937

struct pagedep_hashhead *pagedephd;

5938

struct pagedep *pagedep;

5939

static int next = 0;

5940

struct mount *mp;

5941

struct vnode *vp;

5942

int error, cnt;

5943

ino_t ino;

5944

5945

mtx_assert(&lk, MA_OWNED);

5946

5947

for (cnt = 0; cnt < pagedep_hash; cnt++) {

5948

pagedephd = &pagedep_hashtbl[next++];

5949

if (next >= pagedep_hash)

5950

next = 0;

5951

LIST_FOREACH(pagedep, pagedephd, pd_hash) {

5952

if (LIST_EMPTY(&pagedep->pd_dirremhd))

5953

continue;

5954

mp = pagedep->pd_list.wk_mp;

5955

ino = pagedep->pd_ino;

5956

if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)

5957

continue;

5958

FREE_LOCK(&lk);

5959

if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,

5960

FFSV_FORCEINSMQ))) {

5961

softdep_error("clear_remove: vget", error);

5962

vn_finished_write(mp);

5963

ACQUIRE_LOCK(&lk);

5964

return;

5965

}

5966

if ((error = ffs_syncvnode(vp, MNT_NOWAIT)))

5967

softdep_error("clear_remove: fsync", error);

5968

VI_LOCK(vp);

5969

drain_output(vp);

5970

VI_UNLOCK(vp);

5971

vput(vp);

5972

vn_finished_write(mp);

5973

ACQUIRE_LOCK(&lk);

5974

return;

5975

}

5976

}

5977

}

5978

5979

5980

* Clear out a block of dirty inodes in an effort to reduce

5981

* the number of inodedep dependency structures.

5982

5983

static void

5984

clear_inodedeps(td)

5985

struct thread *td;

5986

{

5987

struct inodedep_hashhead *inodedephd;

5988

struct inodedep *inodedep;

5989

static int next = 0;

5990

struct mount *mp;

5991

struct vnode *vp;

5992

struct fs *fs;

5993

int error, cnt;

5994

ino_t firstino, lastino, ino;

5995

5996

mtx_assert(&lk, MA_OWNED);

5997

5998

* Pick a random inode dependency to be cleared.

5999

* We will then gather up all the inodes in its block

6000

* that have dependencies and flush them out.

6001

6002

for (cnt = 0; cnt < inodedep_hash; cnt++) {

6003

inodedephd = &inodedep_hashtbl[next++];

6004

if (next >= inodedep_hash)

6005

next = 0;

6006

if ((inodedep = LIST_FIRST(inodedephd)) != NULL)

6007

break;

6008

}

6009

if (inodedep == NULL)

6010

return;

6011

fs = inodedep->id_fs;

6012

mp = inodedep->id_list.wk_mp;

6013

6014

* Find the last inode in the block with dependencies.

6015

6016

firstino = inodedep->id_ino & ~(INOPB(fs) - 1);

6017

for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)

6018

if (inodedep_lookup(mp, lastino, 0, &inodedep) != 0)

6019

break;

6020

6021

* Asynchronously push all but the last inode with dependencies.

6022

* Synchronously push the last inode with dependencies to ensure

6023

* that the inode block gets written to free up the inodedeps.

6024

6025

for (ino = firstino; ino <= lastino; ino++) {

6026

if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)

6027

continue;

6028

if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)

6029

continue;

6030

FREE_LOCK(&lk);

6031

if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,

6032

FFSV_FORCEINSMQ)) != 0) {

6033

softdep_error("clear_inodedeps: vget", error);

6034

vn_finished_write(mp);

6035

ACQUIRE_LOCK(&lk);

6036

return;

6037

}

6038

if (ino == lastino) {

6039

if ((error = ffs_syncvnode(vp, MNT_WAIT)))

6040

softdep_error("clear_inodedeps: fsync1", error);

6041

} else {

6042

if ((error = ffs_syncvnode(vp, MNT_NOWAIT)))

6043

softdep_error("clear_inodedeps: fsync2", error);

6044

VI_LOCK(vp);

6045

drain_output(vp);

6046

VI_UNLOCK(vp);

6047

}

6048

vput(vp);

6049

vn_finished_write(mp);

6050

ACQUIRE_LOCK(&lk);

6051

}

6052

}

6053

6054

6055

* Function to determine if the buffer has outstanding dependencies

6056

* that will cause a roll-back if the buffer is written. If wantcount

6057

* is set, return number of dependencies, otherwise just yes or no.

6058

6059

static int

6060

softdep_count_dependencies(bp, wantcount)

6061

struct buf *bp;

6062

int wantcount;

6063

{

6064

struct worklist *wk;

6065

struct inodedep *inodedep;

6066

struct indirdep *indirdep;

6067

struct allocindir *aip;

6068

struct pagedep *pagedep;

6069

struct diradd *dap;

6070

int i, retval;

6071

6072

retval = 0;

6073

ACQUIRE_LOCK(&lk);

6074

LIST_FOREACH(wk, &bp->b_dep, wk_list) {

6075

switch (wk->wk_type) {

6076

6077

case D_INODEDEP:

6078

inodedep = WK_INODEDEP(wk);

6079

if ((inodedep->id_state & DEPCOMPLETE) == 0) {

6080

/* bitmap allocation dependency */

6081

retval += 1;

6082

if (!wantcount)

6083

goto out;

6084

}

6085

if (TAILQ_FIRST(&inodedep->id_inoupdt)) {

6086

/* direct block pointer dependency */

6087

retval += 1;

6088

if (!wantcount)

6089

goto out;

6090

}

6091

if (TAILQ_FIRST(&inodedep->id_extupdt)) {

6092

/* direct block pointer dependency */

6093

retval += 1;

6094

if (!wantcount)

6095

goto out;

6096

}

6097

continue;

6098

6099

case D_INDIRDEP:

6100

indirdep = WK_INDIRDEP(wk);

6101

6102

LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {

6103

/* indirect block pointer dependency */

6104

retval += 1;

6105

if (!wantcount)

6106

goto out;

6107

}

6108

continue;

6109

6110

case D_PAGEDEP:

6111

pagedep = WK_PAGEDEP(wk);

6112

for (i = 0; i < DAHASHSZ; i++) {

6113

6114

LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {

6115

/* directory entry dependency */

6116

retval += 1;

6117

if (!wantcount)

6118

goto out;

6119

}

6120

}

6121

continue;

6122

6123

case D_BMSAFEMAP:

6124

case D_ALLOCDIRECT:

6125

case D_ALLOCINDIR:

6126

case D_MKDIR:

6127

/* never a dependency on these blocks */

6128

continue;

6129

6130

default:

6131

panic("softdep_check_for_rollback: Unexpected type %s",

6132

TYPENAME(wk->wk_type));

6133

/* NOTREACHED */

6134

}

6135

}

6136

out:

6137

FREE_LOCK(&lk);

6138

return retval;

6139

}

6140

6141

6142

* Acquire exclusive access to a buffer.

6143

* Must be called with a locked mtx parameter.

6144

* Return acquired buffer or NULL on failure.

6145

6146

static struct buf *

6147

getdirtybuf(bp, mtx, waitfor)

6148

struct buf *bp;

6149

struct mtx *mtx;

6150

int waitfor;

6151

{

6152

int error;

6153

6154

mtx_assert(mtx, MA_OWNED);

6155

if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) {

6156

if (waitfor != MNT_WAIT)

6157

return (NULL);

6158

error = BUF_LOCK(bp,

6159

LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, mtx);

6160

6161

* Even if we sucessfully acquire bp here, we have dropped

6162

* mtx, which may violates our guarantee.

6163

6164

if (error == 0)

6165

BUF_UNLOCK(bp);

6166

else if (error != ENOLCK)

6167

panic("getdirtybuf: inconsistent lock: %d", error);

6168

mtx_lock(mtx);

6169

return (NULL);

6170

}

6171

if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {

6172

if (mtx == &lk && waitfor == MNT_WAIT) {

6173

mtx_unlock(mtx);

6174

BO_LOCK(bp->b_bufobj);

6175

BUF_UNLOCK(bp);

6176

if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {

6177

bp->b_vflags |= BV_BKGRDWAIT;

6178

msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj),

6179

PRIBIO | PDROP, "getbuf", 0);

6180

} else

6181

BO_UNLOCK(bp->b_bufobj);

6182

mtx_lock(mtx);

6183

return (NULL);

6184

}

6185

BUF_UNLOCK(bp);

6186

if (waitfor != MNT_WAIT)

6187

return (NULL);

6188

6189

* The mtx argument must be bp->b_vp's mutex in

6190

* this case.

6191

6192

#ifdef DEBUG_VFS_LOCKS

6193

if (bp->b_vp->v_type != VCHR)

6194

ASSERT_VI_LOCKED(bp->b_vp, "getdirtybuf");

6195

#endif

6196

bp->b_vflags |= BV_BKGRDWAIT;

6197

msleep(&bp->b_xflags, mtx, PRIBIO, "getbuf", 0);

6198

return (NULL);

6199

}

6200

if ((bp->b_flags & B_DELWRI) == 0) {

6201

BUF_UNLOCK(bp);

6202

return (NULL);

6203

}

6204

bremfree(bp);

6205

return (bp);

6206

}

6207

6208

6209

6210

* Check if it is safe to suspend the file system now. On entry,

6211

* the vnode interlock for devvp should be held. Return 0 with

6212

* the mount interlock held if the file system can be suspended now,

6213

* otherwise return EAGAIN with the mount interlock held.

6214

6215

int

6216

softdep_check_suspend(struct mount *mp,

6217

struct vnode *devvp,

6218

int softdep_deps,

6219

int softdep_accdeps,

6220

int secondary_writes,

6221

int secondary_accwrites)

6222

{

6223

struct bufobj *bo;

6224

struct ufsmount *ump;

6225

int error;

6226

6227

ASSERT_VI_LOCKED(devvp, "softdep_check_suspend");

6228

ump = VFSTOUFS(mp);

6229

bo = &devvp->v_bufobj;

6230

6231

for (;;) {

6232

if (!TRY_ACQUIRE_LOCK(&lk)) {

6233

VI_UNLOCK(devvp);

6234

ACQUIRE_LOCK(&lk);

6235

FREE_LOCK(&lk);

6236

VI_LOCK(devvp);

6237

continue;

6238

}

6239

if (!MNT_ITRYLOCK(mp)) {

6240

FREE_LOCK(&lk);

6241

VI_UNLOCK(devvp);

6242

MNT_ILOCK(mp);

6243

MNT_IUNLOCK(mp);

6244

VI_LOCK(devvp);

6245

continue;

6246

}

6247

if (mp->mnt_secondary_writes != 0) {

6248

FREE_LOCK(&lk);

6249

VI_UNLOCK(devvp);

6250

msleep(&mp->mnt_secondary_writes,

6251

MNT_MTX(mp),

6252

(PUSER - 1) | PDROP, "secwr", 0);

6253

VI_LOCK(devvp);

6254

continue;

6255

}

6256

break;

6257

}

6258

6259

6260

* Reasons for needing more work before suspend:

6261

* - Dirty buffers on devvp.

6262

* - Softdep activity occurred after start of vnode sync loop

6263

* - Secondary writes occurred after start of vnode sync loop

6264

6265

error = 0;

6266

if (bo->bo_numoutput > 0 ||

6267

bo->bo_dirty.bv_cnt > 0 ||

6268

softdep_deps != 0 ||

6269

ump->softdep_deps != 0 ||

6270

softdep_accdeps != ump->softdep_accdeps ||

6271

secondary_writes != 0 ||

6272

mp->mnt_secondary_writes != 0 ||

6273

secondary_accwrites != mp->mnt_secondary_accwrites)

6274

error = EAGAIN;

6275

FREE_LOCK(&lk);

6276

VI_UNLOCK(devvp);

6277

return (error);

6278

}

6279

6280

6281

6282

* Get the number of dependency structures for the file system, both

6283

* the current number and the total number allocated. These will

6284

* later be used to detect that softdep processing has occurred.

6285

6286

void

6287

softdep_get_depcounts(struct mount *mp,

6288

int *softdep_depsp,

6289

int *softdep_accdepsp)

6290

{

6291

struct ufsmount *ump;

6292

6293

ump = VFSTOUFS(mp);

6294

ACQUIRE_LOCK(&lk);

6295

*softdep_depsp = ump->softdep_deps;

6296

*softdep_accdepsp = ump->softdep_accdeps;

6297

FREE_LOCK(&lk);

6298

}

6299

6300

6301

* Wait for pending output on a vnode to complete.

6302

* Must be called with vnode lock and interlock locked.

6303

6304

* XXX: Should just be a call to bufobj_wwait().

6305

6306

static void

6307

drain_output(vp)

6308

struct vnode *vp;

6309

{

6310

ASSERT_VOP_LOCKED(vp, "drain_output");

6311

ASSERT_VI_LOCKED(vp, "drain_output");

6312

6313

while (vp->v_bufobj.bo_numoutput) {

6314

vp->v_bufobj.bo_flag |= BO_WWAIT;

6315

msleep((caddr_t)&vp->v_bufobj.bo_numoutput,

6316

VI_MTX(vp), PRIBIO + 1, "drainvp", 0);

6317

}

6318

}

6319

6320

6321

* Called whenever a buffer that is being invalidated or reallocated

6322

* contains dependencies. This should only happen if an I/O error has

6323

* occurred. The routine is called with the buffer locked.

6324

6325

static void

6326

softdep_deallocate_dependencies(bp)

6327

struct buf *bp;

6328

{

6329

6330

if ((bp->b_ioflags & BIO_ERROR) == 0)

6331

panic("softdep_deallocate_dependencies: dangling deps");

6332

softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);

6333

panic("softdep_deallocate_dependencies: unrecovered I/O error");

6334

}

6335

6336

6337

* Function to handle asynchronous write errors in the filesystem.

6338

6339

static void

6340

softdep_error(func, error)

6341

char *func;

6342

int error;

6343

{

6344

6345

/* XXX should do something better! */

6346

printf("%s: got error %d while accessing filesystem\n", func, error);

6347

}

6348

6349

#ifdef DDB

6350

6351

DB_SHOW_COMMAND(inodedeps, db_show_inodedeps)

6352

{

6353

struct inodedep_hashhead *inodedephd;

6354

struct inodedep *inodedep;

6355

struct fs *fs;

6356

int cnt;

6357

6358

fs = have_addr ? (struct fs *)addr : NULL;

6359

for (cnt = 0; cnt < inodedep_hash; cnt++) {

6360

inodedephd = &inodedep_hashtbl[cnt];

6361

LIST_FOREACH(inodedep, inodedephd, id_hash) {

6362

if (fs != NULL && fs != inodedep->id_fs)

6363

continue;

6364

db_printf("%p fs %p st %x ino %jd inoblk %jd\n",

6365

inodedep, inodedep->id_fs, inodedep->id_state,

6366

(intmax_t)inodedep->id_ino,

6367

(intmax_t)fsbtodb(inodedep->id_fs,

6368

ino_to_fsba(inodedep->id_fs, inodedep->id_ino)));

6369

}

6370

}

6371

}

6372

6373

#endif /* DDB */

6374

6375

#endif /* SOFTUPDATES */

Older »