[Zrouter-src-freebsd] ZRouter.org: push to FreeBSD HEAD tree

zrouter-src-freebsd at zrouter.org zrouter-src-freebsd at zrouter.org
Fri Mar 2 15:39:22 UTC 2012


details:   http://zrouter.org/hg/FreeBSD/head//rev/37083b471c7e
changeset: 384:37083b471c7e
user:      ray at terran.dlink.ua
date:      Fri Mar 02 17:10:47 2012 +0200
description:
Update to FreeBSD-HEAD @232391

diffstat:

 head/sys/ufs/ffs/ffs_extern.h   |    9 ++-
 head/sys/ufs/ffs/ffs_inode.c    |   51 +++++++++++++----
 head/sys/ufs/ffs/ffs_snapshot.c |   58 +++++++++++---------
 head/sys/ufs/ffs/ffs_softdep.c  |  113 ++++++++++++++++++++++++++-------------
 head/sys/ufs/ffs/ffs_vfsops.c   |   22 +++++--
 head/sys/ufs/ffs/ffs_vnops.c    |   58 ++++++++++++++------
 head/sys/ufs/ufs/inode.h        |    4 +-
 head/sys/ufs/ufs/ufs_acl.c      |    8 +-
 head/sys/ufs/ufs/ufs_lookup.c   |    5 +-
 head/sys/ufs/ufs/ufs_quota.c    |    6 +-
 head/sys/ufs/ufs/ufs_vnops.c    |   16 ++++-
 11 files changed, 236 insertions(+), 114 deletions(-)

diffs (962 lines):

diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_extern.h
--- a/head/sys/ufs/ffs/ffs_extern.h	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_extern.h	Fri Mar 02 17:10:47 2012 +0200
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)ffs_extern.h	8.6 (Berkeley) 3/30/95
- * $FreeBSD: head/sys/ufs/ffs/ffs_extern.h 225166 2011-08-25 08:17:39Z mm $
+ * $FreeBSD: head/sys/ufs/ffs/ffs_extern.h 232351 2012-03-01 18:45:25Z mckusick $
  */
 
 #ifndef _UFS_FFS_EXTERN_H
@@ -167,6 +167,13 @@
 #define FLUSH_INODES_WAIT	2
 #define FLUSH_BLOCKS		3
 #define FLUSH_BLOCKS_WAIT	4
+/*
+ * Flag to ffs_syncinode() to request flushing of data only,
+ * but skip the ffs_update() on the inode itself. Used to avoid
+ * deadlock when flushing snapshot inodes while holding snaplk.
+ * Avoid bit conflicts with MNT_WAIT values in sys/mount.h
+ */
+#define	NO_INO_UPDT		0x10
 
 int	ffs_rdonly(struct inode *);
 
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_inode.c
--- a/head/sys/ufs/ffs/ffs_inode.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_inode.c	Fri Mar 02 17:10:47 2012 +0200
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_inode.c 225166 2011-08-25 08:17:39Z mm $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_inode.c 232351 2012-03-01 18:45:25Z mckusick $");
 
 #include "opt_quota.h"
 
@@ -81,7 +81,7 @@
 	struct fs *fs;
 	struct buf *bp;
 	struct inode *ip;
-	int error;
+	int flags, error;
 
 	ASSERT_VOP_ELOCKED(vp, "ffs_update");
 	ufs_itimes(vp);
@@ -92,11 +92,36 @@
 	fs = ip->i_fs;
 	if (fs->fs_ronly && ip->i_ump->um_fsckpid == 0)
 		return (0);
-	error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
-		(int)fs->fs_bsize, NOCRED, &bp);
-	if (error) {
-		brelse(bp);
-		return (error);
+	/*
+	 * If we are updating a snapshot and another process is currently
+	 * writing the buffer containing the inode for this snapshot then
+	 * a deadlock can occur when it tries to check the snapshot to see
+	 * if that block needs to be copied. Thus when updating a snapshot
+	 * we check to see if the buffer is already locked, and if it is
+	 * we drop the snapshot lock until the buffer has been written
+	 * and is available to us. We have to grab a reference to the
+	 * snapshot vnode to prevent it from being removed while we are
+	 * waiting for the buffer.
+	 */
+	flags = 0;
+	if (IS_SNAPSHOT(ip))
+		flags = GB_LOCK_NOWAIT;
+	error = breadn_flags(ip->i_devvp,
+	     fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+	     (int) fs->fs_bsize, 0, 0, 0, NOCRED, flags, &bp);
+	if (error != 0) {
+		if (error != EBUSY) {
+			brelse(bp);
+			return (error);
+		}
+		KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot"));
+		vref(vp);	/* Protect against ffs_snapgone() */
+		VOP_UNLOCK(vp, 0);
+		(void) bread(ip->i_devvp,
+		     fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+		     (int) fs->fs_bsize, NOCRED, &bp);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+		vrele(vp);
 	}
 	if (DOINGSOFTDEP(vp))
 		softdep_update_inodeblock(ip, bp, waitfor);
@@ -108,16 +133,16 @@
 	else
 		*((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
-	if (waitfor && !DOINGASYNC(vp)) {
-		return (bwrite(bp));
-	} else if (vm_page_count_severe() || buf_dirty_count_severe()) {
-		return (bwrite(bp));
+	if ((waitfor && !DOINGASYNC(vp)) ||
+	    (vm_page_count_severe() || buf_dirty_count_severe())) {
+		error = bwrite(bp);
 	} else {
 		if (bp->b_bufsize == fs->fs_bsize)
 			bp->b_flags |= B_CLUSTEROK;
 		bdwrite(bp);
-		return (0);
+		error = 0;
 	}
+	return (error);
 }
 
 #define	SINGLE	0	/* index of single indirect block */
@@ -253,7 +278,7 @@
 	}
 	if (fs->fs_ronly)
 		panic("ffs_truncate: read-only filesystem");
-	if ((ip->i_flags & SF_SNAPSHOT) != 0)
+	if (IS_SNAPSHOT(ip))
 		ffs_snapremove(vp);
 	vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 	osize = ip->i_size;
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_snapshot.c
--- a/head/sys/ufs/ffs/ffs_snapshot.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_snapshot.c	Fri Mar 02 17:10:47 2012 +0200
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_snapshot.c 230250 2012-01-17 01:14:56Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_snapshot.c 232351 2012-03-01 18:45:25Z mckusick $");
 
 #include "opt_quota.h"
 
@@ -203,6 +203,7 @@
 	ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
 	int error, cg, snaploc;
 	int i, size, len, loc;
+	ufs2_daddr_t blockno;
 	uint64_t flag;
 	struct timespec starttime = {0, 0}, endtime;
 	char saved_nice = 0;
@@ -529,7 +530,7 @@
 		    (xvp->v_usecount == 0 &&
 		     (xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) ||
 		    xvp->v_type == VNON ||
-		    (VTOI(xvp)->i_flags & SF_SNAPSHOT)) {
+		    IS_SNAPSHOT(VTOI(xvp))) {
 			VI_UNLOCK(xvp);
 			MNT_ILOCK(mp);
 			continue;
@@ -815,21 +816,26 @@
 	if (space != NULL)
 		free(space, M_UFSMNT);
 	/*
-	 * If another process is currently writing the buffer containing
-	 * the inode for this snapshot then a deadlock can occur. Drop
-	 * the snapshot lock until the buffer has been written.
+	 * Preallocate all the direct blocks in the snapshot inode so
+	 * that we never have to write the inode itself to commit an
+	 * update to the contents of the snapshot. Note that once
+	 * created, the size of the snapshot will never change, so
+	 * there will never be a need to write the inode except to
+	 * update the non-integrity-critical time fields and
+	 * allocated-block count.
 	 */
-	VREF(vp);	/* Protect against ffs_snapgone() */
-	VOP_UNLOCK(vp, 0);
-	(void) bread(ip->i_devvp,
-		     fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
-		     (int) fs->fs_bsize, NOCRED, &nbp);
-	brelse(nbp);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-	if (ip->i_effnlink == 0)
-		error = ENOENT;		/* Snapshot file unlinked */
-	else
-		vrele(vp);		/* Drop extra reference */
+	for (blockno = 0; blockno < NDADDR; blockno++) {
+		if (DIP(ip, i_db[blockno]) != 0)
+			continue;
+		error = UFS_BALLOC(vp, lblktosize(fs, blockno),
+		    fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
+		if (error)
+			break;
+		error = readblock(vp, bp, blockno);
+		bawrite(bp);
+		if (error != 0)
+			break;
+	}
 done:
 	free(copy_fs->fs_csp, M_UFSMNT);
 	free(copy_fs, M_UFSMNT);
@@ -1902,7 +1908,7 @@
 			bawrite(cbp);
 			if ((vtype == VDIR || dopersistence) &&
 			    ip->i_effnlink > 0)
-				(void) ffs_syncvnode(vp, MNT_WAIT);
+				(void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
 			continue;
 		}
 		/*
@@ -1913,7 +1919,7 @@
 			bawrite(cbp);
 			if ((vtype == VDIR || dopersistence) &&
 			    ip->i_effnlink > 0)
-				(void) ffs_syncvnode(vp, MNT_WAIT);
+				(void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
 			break;
 		}
 		savedcbp = cbp;
@@ -1931,7 +1937,7 @@
 		bawrite(savedcbp);
 		if ((vtype == VDIR || dopersistence) &&
 		    VTOI(vp)->i_effnlink > 0)
-			(void) ffs_syncvnode(vp, MNT_WAIT);
+			(void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
 	}
 	/*
 	 * If we have been unable to allocate a block in which to do
@@ -1987,9 +1993,9 @@
 			continue;
 		}
 		ip = VTOI(vp);
-		if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size ==
+		if (!IS_SNAPSHOT(ip) || ip->i_size ==
 		    lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) {
-			if ((ip->i_flags & SF_SNAPSHOT) == 0) {
+			if (!IS_SNAPSHOT(ip)) {
 				reason = "non-snapshot";
 			} else {
 				reason = "old format snapshot";
@@ -2250,7 +2256,7 @@
 	int launched_async_io, prev_norunningbuf;
 	long saved_runningbufspace;
 
-	if (devvp != bp->b_vp && (VTOI(bp->b_vp)->i_flags & SF_SNAPSHOT) != 0)
+	if (devvp != bp->b_vp && IS_SNAPSHOT(VTOI(bp->b_vp)))
 		return (0);		/* Update on a snapshot file */
 	if (td->td_pflags & TDP_COWINPROGRESS)
 		panic("ffs_copyonwrite: recursive call");
@@ -2395,7 +2401,7 @@
 			bawrite(cbp);
 			if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
 			    dopersistence) && ip->i_effnlink > 0)
-				(void) ffs_syncvnode(vp, MNT_WAIT);
+				(void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
 			else
 				launched_async_io = 1;
 			continue;
@@ -2408,7 +2414,7 @@
 			bawrite(cbp);
 			if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
 			    dopersistence) && ip->i_effnlink > 0)
-				(void) ffs_syncvnode(vp, MNT_WAIT);
+				(void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
 			else
 				launched_async_io = 1;
 			break;
@@ -2428,7 +2434,7 @@
 		bawrite(savedcbp);
 		if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
 		    dopersistence) && VTOI(vp)->i_effnlink > 0)
-			(void) ffs_syncvnode(vp, MNT_WAIT);
+			(void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
 		else
 			launched_async_io = 1;
 	}
@@ -2478,7 +2484,7 @@
 	}
 	TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
 		vp = ITOV(ip);
-		ffs_syncvnode(vp, waitfor);
+		ffs_syncvnode(vp, waitfor|NO_INO_UPDT);
 	}
 	lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
 }
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_softdep.c
--- a/head/sys/ufs/ffs/ffs_softdep.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_softdep.c	Fri Mar 02 17:10:47 2012 +0200
@@ -40,9 +40,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_softdep.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_softdep.c 232351 2012-03-01 18:45:25Z mckusick $");
 
 #include "opt_ffs.h"
+#include "opt_quota.h"
 #include "opt_ddb.h"
 
 /*
@@ -2826,7 +2827,12 @@
 {
 	struct ufsmount *ump;
 
-	if (DOINGSUJ(vp) == 0)
+	/*
+	 * Nothing to do if we are not running journaled soft updates.
+	 * If we currently hold the snapshot lock, we must avoid handling
+	 * other resources that could cause deadlock.
+	 */
+	if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)))
 		return (0);
 	ump = VFSTOUFS(vp->v_mount);
 	ACQUIRE_LOCK(&lk);
@@ -2872,7 +2878,12 @@
 
 	ump = VFSTOUFS(dvp->v_mount);
 	mtx_assert(&lk, MA_OWNED);
-	if (journal_space(ump, 0))
+	/*
+	 * Nothing to do if we have sufficient journal space.
+	 * If we currently hold the snapshot lock, we must avoid
+	 * handling other resources that could cause deadlock.
+	 */
+	if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
 		return;
 	stat_journal_low++;
 	FREE_LOCK(&lk);
@@ -4303,11 +4314,15 @@
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
+	int dflags;
 
 	KASSERT(ip->i_nlink >= ip->i_effnlink,
 	    ("inodedep_lookup_ip: bad delta"));
-	(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number,
-	    DEPALLOC, &inodedep);
+	dflags = DEPALLOC;
+	if (IS_SNAPSHOT(ip))
+		dflags |= NODELAY;
+	(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags,
+	    &inodedep);
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 
 	return (inodedep);
@@ -4695,7 +4710,7 @@
 	 * the cylinder group map from which it was allocated.
 	 */
 	ACQUIRE_LOCK(&lk);
-	if ((inodedep_lookup(mp, newinum, DEPALLOC|NODELAY, &inodedep)))
+	if ((inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep)))
 		panic("softdep_setup_inomapdep: dependency %p for new"
 		    "inode already exists", inodedep);
 	bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum));
@@ -5436,6 +5451,7 @@
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct mount *mp;
+	int dflags;
 
 	if (lbn != nbp->b_lblkno)
 		panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
@@ -5443,7 +5459,10 @@
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
 	mp = UFSTOVFS(ip->i_ump);
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
-	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	dflags = DEPALLOC;
+	if (IS_SNAPSHOT(ip))
+		dflags |= NODELAY;
+	(void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
@@ -5473,11 +5492,15 @@
 	struct inodedep *inodedep;
 	struct allocindir *aip;
 	ufs_lbn_t lbn;
+	int dflags;
 
 	lbn = nbp->b_lblkno;
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
 	aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
-	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep);
+	dflags = DEPALLOC;
+	if (IS_SNAPSHOT(ip))
+		dflags |= NODELAY;
+	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
 	if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
 		panic("softdep_setup_allocindir_meta: Block already existed");
@@ -6084,11 +6107,7 @@
 	struct mount *mp;
 	ufs2_daddr_t extblocks, datablocks;
 	ufs_lbn_t tmpval, lbn, lastlbn;
-	int frags;
-	int lastoff, iboff;
-	int allocblock;
-	int error, i;
-	int needj;
+	int frags, lastoff, iboff, allocblock, needj, dflags, error, i;
 
 	fs = ip->i_fs;
 	mp = UFSTOVFS(ip->i_ump);
@@ -6106,7 +6125,10 @@
 	 * we don't need to journal the block frees.  The canceled journals
 	 * for the allocations will suffice.
 	 */
-	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	dflags = DEPALLOC;
+	if (IS_SNAPSHOT(ip))
+		dflags |= NODELAY;
+	inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
 	if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
 	    length == 0)
 		needj = 0;
@@ -6231,7 +6253,7 @@
 		*((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
 	ACQUIRE_LOCK(&lk);
-	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	(void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
@@ -6309,7 +6331,7 @@
 
 	}
 	ACQUIRE_LOCK(&lk);
-	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
 	TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
 	freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
 	/*
@@ -6397,7 +6419,7 @@
 	struct fs *fs;
 	ufs2_daddr_t extblocks, datablocks;
 	struct mount *mp;
-	int i, delay, error;
+	int i, delay, error, dflags;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 
@@ -6428,7 +6450,7 @@
 	}
 #ifdef QUOTA
 	/* Reference the quotas in case the block count is wrong in the end. */
-	quotaref(vp, freeblks->fb_quota);
+	quotaref(ITOV(ip), freeblks->fb_quota);
 	(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
 	freeblks->fb_chkcnt = -datablocks;
@@ -6462,7 +6484,10 @@
 	 * Find and eliminate any inode dependencies.
 	 */
 	ACQUIRE_LOCK(&lk);
-	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+	dflags = DEPALLOC;
+	if (IS_SNAPSHOT(ip))
+		dflags |= NODELAY;
+	(void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
@@ -7160,13 +7185,16 @@
 	mtx_assert(&lk, MA_OWNED);
 
 	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
+	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !LIST_EMPTY(&inodedep->id_inowait) ||
+	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
+	    !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
 	    inodedep->id_mkdiradd != NULL || 
 	    inodedep->id_nlinkdelta != 0)
 		return (0);
@@ -8025,7 +8053,7 @@
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
-	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
+	inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
 	/*
 	 * If we're journaling, link the diradd into the jaddref so it
 	 * may be completed after the journal entry is written.  Otherwise,
@@ -8627,8 +8655,7 @@
 	 * the number of freefile and freeblks structures.
 	 */
 	ACQUIRE_LOCK(&lk);
-	if (!(ip->i_flags & SF_SNAPSHOT) &&
-	    dep_current[D_DIRREM] > max_softdeps / 2)
+	if (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2)
 		(void) request_cleanup(ITOV(dp)->v_mount, FLUSH_BLOCKS);
 	FREE_LOCK(&lk);
 	dirrem = malloc(sizeof(struct dirrem),
@@ -8862,11 +8889,11 @@
 	/*
 	 * Lookup the jaddref for this journal entry.  We must finish
 	 * initializing it and make the diradd write dependent on it.
-	 * If we're not journaling Put it on the id_bufwait list if the inode
-	 * is not yet written. If it is written, do the post-inode write
-	 * processing to put it on the id_pendinghd list.
-	 */
-	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
+	 * If we're not journaling, put it on the id_bufwait list if the
+	 * inode is not yet written. If it is written, do the post-inode
+	 * write processing to put it on the id_pendinghd list.
+	 */
+	inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
@@ -8908,9 +8935,13 @@
 	struct inode *ip;	/* the inode with the increased link count */
 {
 	struct inodedep *inodedep;
+	int dflags;
 
 	ACQUIRE_LOCK(&lk);
-	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep);
+	dflags = DEPALLOC;
+	if (IS_SNAPSHOT(ip))
+		dflags |= NODELAY;
+	inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("softdep_change_linkcnt: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
@@ -12112,6 +12143,7 @@
 		case D_FREEWORK:
 		case D_FREEDEP:
 		case D_JSEGDEP:
+		case D_JNEWBLK:
 			continue;
 
 		default:
@@ -12539,22 +12571,25 @@
 	ufs2_daddr_t needed;
 	int error;
 
-	mp = vp->v_mount;
-	ump = VFSTOUFS(mp);
-	mtx_assert(UFS_MTX(ump), MA_OWNED);
+	/*
+	 * If we are being called because of a process doing a
+	 * copy-on-write, then it is not safe to process any
+	 * worklist items as we will recurse into the copyonwrite
+	 * routine.  This will result in an incoherent snapshot.
+	 * If the vnode that we hold is a snapshot, we must avoid
+	 * handling other resources that could cause deadlock.
+	 */
+	if ((curthread->td_pflags & TDP_COWINPROGRESS) || IS_SNAPSHOT(VTOI(vp)))
+		return (0);
+
 	if (resource == FLUSH_BLOCKS_WAIT)
 		stat_cleanup_blkrequests += 1;
 	else
 		stat_cleanup_inorequests += 1;
 
-	/*
-	 * If we are being called because of a process doing a
-	 * copy-on-write, then it is not safe to process any
-	 * worklist items as we will recurse into the copyonwrite
-	 * routine.  This will result in an incoherent snapshot.
-	 */
-	if (curthread->td_pflags & TDP_COWINPROGRESS)
-		return (0);
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	mtx_assert(UFS_MTX(ump), MA_OWNED);
 	UFS_UNLOCK(ump);
 	error = ffs_update(vp, 1);
 	if (error != 0) {
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_vfsops.c
--- a/head/sys/ufs/ffs/ffs_vfsops.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_vfsops.c	Fri Mar 02 17:10:47 2012 +0200
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 230249 2012-01-17 01:08:01Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 231160 2012-02-07 20:43:28Z mckusick $");
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
@@ -1436,17 +1436,26 @@
 	int softdep_accdeps;
 	struct bufobj *bo;
 
+	wait = 0;
+	suspend = 0;
+	suspended = 0;
 	td = curthread;
 	fs = ump->um_fs;
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
 		panic("%s: ffs_sync: modification on read-only filesystem",
 		    fs->fs_fsmnt);
 	/*
+	 * For a lazy sync, we just care about the filesystem metadata.
+	 */
+	if (waitfor == MNT_LAZY) {
+		secondary_accwrites = 0;
+		secondary_writes = 0;
+		lockreq = 0;
+		goto metasync;
+	}
+	/*
 	 * Write back each (modified) inode.
 	 */
-	wait = 0;
-	suspend = 0;
-	suspended = 0;
 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
 	if (waitfor == MNT_SUSPEND) {
 		suspend = 1;
@@ -1517,11 +1526,12 @@
 #ifdef QUOTA
 	qsync(mp);
 #endif
+
+metasync:
 	devvp = ump->um_devvp;
 	bo = &devvp->v_bufobj;
 	BO_LOCK(bo);
-	if (waitfor != MNT_LAZY &&
-	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
+	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
 		BO_UNLOCK(bo);
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_vnops.c
--- a/head/sys/ufs/ffs/ffs_vnops.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_vnops.c	Fri Mar 02 17:10:47 2012 +0200
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vnops.c 224503 2011-07-30 00:43:18Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vnops.c 232351 2012-03-01 18:45:25Z mckusick $");
 
 #include <sys/param.h>
 #include <sys/bio.h>
@@ -216,8 +216,10 @@
 	struct buf *bp;
 	struct buf *nbp;
 	ufs_lbn_t lbn;
-	int error, wait, passes;
+	int error, wait, passes, noupdate;
 
+	noupdate = waitfor & NO_INO_UPDT;
+	waitfor &= ~NO_INO_UPDT;
 	ip = VTOI(vp);
 	ip->i_flag &= ~IN_NEEDSYNC;
 	bo = &vp->v_bufobj;
@@ -300,7 +302,10 @@
 	}
 	if (waitfor != MNT_WAIT) {
 		BO_UNLOCK(bo);
-		return (ffs_update(vp, waitfor));
+		if (noupdate)
+			return (0);
+		else
+			return (ffs_update(vp, waitfor));
 	}
 	/* Drain IO to see if we're done. */
 	bufobj_wwait(bo, 0, 0);
@@ -317,7 +322,7 @@
 	 */
 	if (bo->bo_dirty.bv_cnt > 0) {
 		/* Write the inode after sync passes to flush deps. */
-		if (wait && DOINGSOFTDEP(vp)) {
+		if (wait && DOINGSOFTDEP(vp) && noupdate == 0) {
 			BO_UNLOCK(bo);
 			ffs_update(vp, MNT_WAIT);
 			BO_LOCK(bo);
@@ -332,7 +337,9 @@
 #endif
 	}
 	BO_UNLOCK(bo);
-	error = ffs_update(vp, MNT_WAIT);
+	error = 0;
+	if (noupdate == 0)
+		error = ffs_update(vp, MNT_WAIT);
 	if (DOINGSUJ(vp))
 		softdep_journal_fsync(VTOI(vp));
 	return (error);
@@ -420,7 +427,8 @@
 	ufs_lbn_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
-	int error, orig_resid;
+	ssize_t orig_resid;
+	int error;
 	int seqcount;
 	int ioflag;
 
@@ -633,8 +641,9 @@
 	struct buf *bp;
 	ufs_lbn_t lbn;
 	off_t osize;
+	ssize_t resid;
 	int seqcount;
-	int blkoffset, error, flags, ioflag, resid, size, xfersize;
+	int blkoffset, error, flags, ioflag, size, xfersize;
 
 	vp = ap->a_vp;
 	uio = ap->a_uio;
@@ -718,15 +727,6 @@
 			vnode_pager_setsize(vp, ip->i_size);
 			break;
 		}
-		/*
-		 * If the buffer is not valid we have to clear out any
-		 * garbage data from the pages instantiated for the buffer.
-		 * If we do not, a failed uiomove() during a write can leave
-		 * the prior contents of the pages exposed to a userland
-		 * mmap().  XXX deal with uiomove() errors a better way.
-		 */
-		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
-			vfs_bio_clrbuf(bp);
 		if (ioflag & IO_DIRECT)
 			bp->b_flags |= B_DIRECT;
 		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
@@ -743,6 +743,26 @@
 
 		error =
 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+		/*
+		 * If the buffer is not already filled and we encounter an
+		 * error while trying to fill it, we have to clear out any
+		 * garbage data from the pages instantiated for the buffer.
+		 * If we do not, a failed uiomove() during a write can leave
+		 * the prior contents of the pages exposed to a userland mmap.
+		 *
+		 * Note that we need only clear buffers with a transfer size
+		 * equal to the block size because buffers with a shorter
+		 * transfer size were cleared above by the call to UFS_BALLOC()
+		 * with the BA_CLRBUF flag set.
+		 *
+		 * If the source region for uiomove identically mmaps the
+		 * buffer, uiomove() performed the NOP copy, and the buffer
+		 * content remains valid because the page fault handler
+		 * validated the pages.
+		 */
+		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
+		    fs->fs_bsize == xfersize)
+			vfs_bio_clrbuf(bp);
 		if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
 		   (LIST_EMPTY(&bp->b_dep))) {
 			bp->b_flags |= B_RELBUF;
@@ -860,7 +880,8 @@
 	ufs_lbn_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
-	int error, orig_resid;
+	ssize_t orig_resid;
+	int error;
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
@@ -1013,7 +1034,8 @@
 	struct buf *bp;
 	ufs_lbn_t lbn;
 	off_t osize;
-	int blkoffset, error, flags, resid, size, xfersize;
+	ssize_t resid;
+	int blkoffset, error, flags, size, xfersize;
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/inode.h
--- a/head/sys/ufs/ufs/inode.h	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/inode.h	Fri Mar 02 17:10:47 2012 +0200
@@ -32,7 +32,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)inode.h	8.9 (Berkeley) 5/14/95
- * $FreeBSD: head/sys/ufs/ufs/inode.h 224503 2011-07-30 00:43:18Z mckusick $
+ * $FreeBSD: head/sys/ufs/ufs/inode.h 232351 2012-03-01 18:45:25Z mckusick $
  */
 
 #ifndef _UFS_UFS_INODE_H_
@@ -158,6 +158,7 @@
 #define	SHORTLINK(ip) \
 	(((ip)->i_ump->um_fstype == UFS1) ? \
 	(caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db)
+#define IS_SNAPSHOT(ip)		((ip)->i_flags & SF_SNAPSHOT)
 
 /*
  * Structure used to pass around logical block paths generated by
@@ -176,7 +177,6 @@
 /* Determine if soft dependencies are being done */
 #define DOINGSOFTDEP(vp)   ((vp)->v_mount->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
 #define MOUNTEDSOFTDEP(mp) ((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
-#define DOINGASYNC(vp)	   ((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC)
 #define DOINGSUJ(vp)	   ((vp)->v_mount->mnt_flag & MNT_SUJ)
 #define MOUNTEDSUJ(mp)	   ((mp)->mnt_flag & MNT_SUJ)
 
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_acl.c
--- a/head/sys/ufs/ufs/ufs_acl.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_acl.c	Fri Mar 02 17:10:47 2012 +0200
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_acl.c 231122 2012-02-07 09:51:41Z kib $");
 
 #include "opt_ufs.h"
 #include "opt_quota.h"
@@ -422,7 +422,8 @@
 
 	VN_KNOTE_UNLOCKED(vp, NOTE_ATTRIB);
 
-	return (0);
+	error = UFS_UPDATE(vp, 0);
+	return (error);
 }
 
 static int
@@ -591,10 +592,11 @@
 		 */
 		ufs_sync_inode_from_acl(ap->a_aclp, ip);
 		ip->i_flag |= IN_CHANGE;
+		error = UFS_UPDATE(ap->a_vp, 0);
 	}
 
 	VN_KNOTE_UNLOCKED(ap->a_vp, NOTE_ATTRIB);
-	return (0);
+	return (error);
 }
 
 int
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_lookup.c
--- a/head/sys/ufs/ufs/ufs_lookup.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_lookup.c	Fri Mar 02 17:10:47 2012 +0200
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_lookup.c 222954 2011-06-10 22:18:25Z jeff $");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_lookup.c 231949 2012-02-21 01:05:12Z kib $");
 
 #include "opt_ufs.h"
 #include "opt_quota.h"
@@ -1337,7 +1337,8 @@
 	doff_t off;
 	struct dirtemplate dbuf;
 	struct direct *dp = (struct direct *)&dbuf;
-	int error, count, namlen;
+	int error, namlen;
+	ssize_t count;
 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
 
 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_quota.c
--- a/head/sys/ufs/ufs/ufs_quota.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_quota.c	Fri Mar 02 17:10:47 2012 +0200
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_quota.c 229828 2012-01-08 23:06:53Z kib $");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_quota.c 232003 2012-02-22 20:03:51Z kib $");
 
 #include "opt_ffs.h"
 
@@ -1469,6 +1469,7 @@
 	if (dq == NODQUOT)
 		return;
 	DQH_LOCK();
+	KASSERT(dq->dq_cnt > 0, ("Lost dq %p reference 1", dq));
 	if (dq->dq_cnt > 1) {
 		dq->dq_cnt--;
 		DQH_UNLOCK();
@@ -1479,6 +1480,7 @@
 	(void) dqsync(vp, dq);
 
 	DQH_LOCK();
+	KASSERT(dq->dq_cnt > 0, ("Lost dq %p reference 2", dq));
 	if (--dq->dq_cnt > 0)
 	{
 		DQH_UNLOCK();
@@ -1658,6 +1660,7 @@
 	 */
 	found = 0;
 	ip = VTOI(vp);
+	mtx_lock(&dqhlock);
 	for (i = 0; i < MAXQUOTAS; i++) {
 		if ((dq = ip->i_dquot[i]) == NODQUOT)
 			continue;
@@ -1665,6 +1668,7 @@
 		qrp[i] = dq;
 		found++;
 	}
+	mtx_unlock(&dqhlock);
 	return (found);
 }
 
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_vnops.c
--- a/head/sys/ufs/ufs/ufs_vnops.c	Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_vnops.c	Fri Mar 02 17:10:47 2012 +0200
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 226971 2011-10-31 20:24:33Z pho $");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 231122 2012-02-07 09:51:41Z kib $");
 
 #include "opt_quota.h"
 #include "opt_suiddir.h"
@@ -571,8 +571,9 @@
 			DIP_SET(ip, i_flags, ip->i_flags);
 		}
 		ip->i_flag |= IN_CHANGE;
+		error = UFS_UPDATE(vp, 0);
 		if (vap->va_flags & (IMMUTABLE | APPEND))
-			return (0);
+			return (error);
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
@@ -738,6 +739,9 @@
 	VI_LOCK(vp);
 	ip->i_flag |= IN_ACCESS;
 	VI_UNLOCK(vp);
+	/*
+	 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
+	 */
 	return (0);
 }
 
@@ -794,6 +798,9 @@
 	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
 		error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
 #endif
+	if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
+		error = UFS_UPDATE(vp, 0);
+
 	return (error);
 }
 
@@ -912,7 +919,8 @@
 			DIP_SET(ip, i_mode, ip->i_mode);
 		}
 	}
-	return (0);
+	error = UFS_UPDATE(vp, 0);
+	return (error);
 }
 
 static int
@@ -2079,6 +2087,7 @@
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		dp->i_flag |= IN_CHANGE;
+		error = UFS_UPDATE(dvp, 0);
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		ip->i_flag |= IN_CHANGE;
@@ -2122,6 +2131,7 @@
 		ip->i_size = len;
 		DIP_SET(ip, i_size, len);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		error = UFS_UPDATE(vp, 0);
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
 		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,


More information about the Zrouter-src-freebsd mailing list