[Zrouter-src-freebsd] ZRouter.org: push to FreeBSD HEAD tree
zrouter-src-freebsd at zrouter.org
zrouter-src-freebsd at zrouter.org
Fri Mar 2 15:39:22 UTC 2012
details: http://zrouter.org/hg/FreeBSD/head//rev/37083b471c7e
changeset: 384:37083b471c7e
user: ray at terran.dlink.ua
date: Fri Mar 02 17:10:47 2012 +0200
description:
Update to FreeBSD-HEAD @232391
diffstat:
head/sys/ufs/ffs/ffs_extern.h | 9 ++-
head/sys/ufs/ffs/ffs_inode.c | 51 +++++++++++++----
head/sys/ufs/ffs/ffs_snapshot.c | 58 +++++++++++---------
head/sys/ufs/ffs/ffs_softdep.c | 113 ++++++++++++++++++++++++++-------------
head/sys/ufs/ffs/ffs_vfsops.c | 22 +++++--
head/sys/ufs/ffs/ffs_vnops.c | 58 ++++++++++++++------
head/sys/ufs/ufs/inode.h | 4 +-
head/sys/ufs/ufs/ufs_acl.c | 8 +-
head/sys/ufs/ufs/ufs_lookup.c | 5 +-
head/sys/ufs/ufs/ufs_quota.c | 6 +-
head/sys/ufs/ufs/ufs_vnops.c | 16 ++++-
11 files changed, 236 insertions(+), 114 deletions(-)
diffs (962 lines):
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_extern.h
--- a/head/sys/ufs/ffs/ffs_extern.h Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_extern.h Fri Mar 02 17:10:47 2012 +0200
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95
- * $FreeBSD: head/sys/ufs/ffs/ffs_extern.h 225166 2011-08-25 08:17:39Z mm $
+ * $FreeBSD: head/sys/ufs/ffs/ffs_extern.h 232351 2012-03-01 18:45:25Z mckusick $
*/
#ifndef _UFS_FFS_EXTERN_H
@@ -167,6 +167,13 @@
#define FLUSH_INODES_WAIT 2
#define FLUSH_BLOCKS 3
#define FLUSH_BLOCKS_WAIT 4
+/*
+ * Flag to ffs_syncinode() to request flushing of data only,
+ * but skip the ffs_update() on the inode itself. Used to avoid
+ * deadlock when flushing snapshot inodes while holding snaplk.
+ * Avoid bit conflicts with MNT_WAIT values in sys/mount.h
+ */
+#define NO_INO_UPDT 0x10
int ffs_rdonly(struct inode *);
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_inode.c
--- a/head/sys/ufs/ffs/ffs_inode.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_inode.c Fri Mar 02 17:10:47 2012 +0200
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_inode.c 225166 2011-08-25 08:17:39Z mm $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_inode.c 232351 2012-03-01 18:45:25Z mckusick $");
#include "opt_quota.h"
@@ -81,7 +81,7 @@
struct fs *fs;
struct buf *bp;
struct inode *ip;
- int error;
+ int flags, error;
ASSERT_VOP_ELOCKED(vp, "ffs_update");
ufs_itimes(vp);
@@ -92,11 +92,36 @@
fs = ip->i_fs;
if (fs->fs_ronly && ip->i_ump->um_fsckpid == 0)
return (0);
- error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
- (int)fs->fs_bsize, NOCRED, &bp);
- if (error) {
- brelse(bp);
- return (error);
+ /*
+ * If we are updating a snapshot and another process is currently
+ * writing the buffer containing the inode for this snapshot then
+ * a deadlock can occur when it tries to check the snapshot to see
+ * if that block needs to be copied. Thus when updating a snapshot
+ * we check to see if the buffer is already locked, and if it is
+ * we drop the snapshot lock until the buffer has been written
+ * and is available to us. We have to grab a reference to the
+ * snapshot vnode to prevent it from being removed while we are
+ * waiting for the buffer.
+ */
+ flags = 0;
+ if (IS_SNAPSHOT(ip))
+ flags = GB_LOCK_NOWAIT;
+ error = breadn_flags(ip->i_devvp,
+ fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+ (int) fs->fs_bsize, 0, 0, 0, NOCRED, flags, &bp);
+ if (error != 0) {
+ if (error != EBUSY) {
+ brelse(bp);
+ return (error);
+ }
+ KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot"));
+ vref(vp); /* Protect against ffs_snapgone() */
+ VOP_UNLOCK(vp, 0);
+ (void) bread(ip->i_devvp,
+ fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+ (int) fs->fs_bsize, NOCRED, &bp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ vrele(vp);
}
if (DOINGSOFTDEP(vp))
softdep_update_inodeblock(ip, bp, waitfor);
@@ -108,16 +133,16 @@
else
*((struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
- if (waitfor && !DOINGASYNC(vp)) {
- return (bwrite(bp));
- } else if (vm_page_count_severe() || buf_dirty_count_severe()) {
- return (bwrite(bp));
+ if ((waitfor && !DOINGASYNC(vp)) ||
+ (vm_page_count_severe() || buf_dirty_count_severe())) {
+ error = bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
bdwrite(bp);
- return (0);
+ error = 0;
}
+ return (error);
}
#define SINGLE 0 /* index of single indirect block */
@@ -253,7 +278,7 @@
}
if (fs->fs_ronly)
panic("ffs_truncate: read-only filesystem");
- if ((ip->i_flags & SF_SNAPSHOT) != 0)
+ if (IS_SNAPSHOT(ip))
ffs_snapremove(vp);
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
osize = ip->i_size;
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_snapshot.c
--- a/head/sys/ufs/ffs/ffs_snapshot.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_snapshot.c Fri Mar 02 17:10:47 2012 +0200
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_snapshot.c 230250 2012-01-17 01:14:56Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_snapshot.c 232351 2012-03-01 18:45:25Z mckusick $");
#include "opt_quota.h"
@@ -203,6 +203,7 @@
ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
int error, cg, snaploc;
int i, size, len, loc;
+ ufs2_daddr_t blockno;
uint64_t flag;
struct timespec starttime = {0, 0}, endtime;
char saved_nice = 0;
@@ -529,7 +530,7 @@
(xvp->v_usecount == 0 &&
(xvp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == 0) ||
xvp->v_type == VNON ||
- (VTOI(xvp)->i_flags & SF_SNAPSHOT)) {
+ IS_SNAPSHOT(VTOI(xvp))) {
VI_UNLOCK(xvp);
MNT_ILOCK(mp);
continue;
@@ -815,21 +816,26 @@
if (space != NULL)
free(space, M_UFSMNT);
/*
- * If another process is currently writing the buffer containing
- * the inode for this snapshot then a deadlock can occur. Drop
- * the snapshot lock until the buffer has been written.
+ * Preallocate all the direct blocks in the snapshot inode so
+ * that we never have to write the inode itself to commit an
+ * update to the contents of the snapshot. Note that once
+ * created, the size of the snapshot will never change, so
+ * there will never be a need to write the inode except to
+ * update the non-integrity-critical time fields and
+ * allocated-block count.
*/
- VREF(vp); /* Protect against ffs_snapgone() */
- VOP_UNLOCK(vp, 0);
- (void) bread(ip->i_devvp,
- fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
- (int) fs->fs_bsize, NOCRED, &nbp);
- brelse(nbp);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- if (ip->i_effnlink == 0)
- error = ENOENT; /* Snapshot file unlinked */
- else
- vrele(vp); /* Drop extra reference */
+ for (blockno = 0; blockno < NDADDR; blockno++) {
+ if (DIP(ip, i_db[blockno]) != 0)
+ continue;
+ error = UFS_BALLOC(vp, lblktosize(fs, blockno),
+ fs->fs_bsize, KERNCRED, BA_CLRBUF, &bp);
+ if (error)
+ break;
+ error = readblock(vp, bp, blockno);
+ bawrite(bp);
+ if (error != 0)
+ break;
+ }
done:
free(copy_fs->fs_csp, M_UFSMNT);
free(copy_fs, M_UFSMNT);
@@ -1902,7 +1908,7 @@
bawrite(cbp);
if ((vtype == VDIR || dopersistence) &&
ip->i_effnlink > 0)
- (void) ffs_syncvnode(vp, MNT_WAIT);
+ (void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
continue;
}
/*
@@ -1913,7 +1919,7 @@
bawrite(cbp);
if ((vtype == VDIR || dopersistence) &&
ip->i_effnlink > 0)
- (void) ffs_syncvnode(vp, MNT_WAIT);
+ (void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
break;
}
savedcbp = cbp;
@@ -1931,7 +1937,7 @@
bawrite(savedcbp);
if ((vtype == VDIR || dopersistence) &&
VTOI(vp)->i_effnlink > 0)
- (void) ffs_syncvnode(vp, MNT_WAIT);
+ (void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
}
/*
* If we have been unable to allocate a block in which to do
@@ -1987,9 +1993,9 @@
continue;
}
ip = VTOI(vp);
- if ((ip->i_flags & SF_SNAPSHOT) == 0 || ip->i_size ==
+ if (!IS_SNAPSHOT(ip) || ip->i_size ==
lblktosize(fs, howmany(fs->fs_size, fs->fs_frag))) {
- if ((ip->i_flags & SF_SNAPSHOT) == 0) {
+ if (!IS_SNAPSHOT(ip)) {
reason = "non-snapshot";
} else {
reason = "old format snapshot";
@@ -2250,7 +2256,7 @@
int launched_async_io, prev_norunningbuf;
long saved_runningbufspace;
- if (devvp != bp->b_vp && (VTOI(bp->b_vp)->i_flags & SF_SNAPSHOT) != 0)
+ if (devvp != bp->b_vp && IS_SNAPSHOT(VTOI(bp->b_vp)))
return (0); /* Update on a snapshot file */
if (td->td_pflags & TDP_COWINPROGRESS)
panic("ffs_copyonwrite: recursive call");
@@ -2395,7 +2401,7 @@
bawrite(cbp);
if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
dopersistence) && ip->i_effnlink > 0)
- (void) ffs_syncvnode(vp, MNT_WAIT);
+ (void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
else
launched_async_io = 1;
continue;
@@ -2408,7 +2414,7 @@
bawrite(cbp);
if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
dopersistence) && ip->i_effnlink > 0)
- (void) ffs_syncvnode(vp, MNT_WAIT);
+ (void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
else
launched_async_io = 1;
break;
@@ -2428,7 +2434,7 @@
bawrite(savedcbp);
if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR ||
dopersistence) && VTOI(vp)->i_effnlink > 0)
- (void) ffs_syncvnode(vp, MNT_WAIT);
+ (void) ffs_syncvnode(vp, MNT_WAIT|NO_INO_UPDT);
else
launched_async_io = 1;
}
@@ -2478,7 +2484,7 @@
}
TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) {
vp = ITOV(ip);
- ffs_syncvnode(vp, waitfor);
+ ffs_syncvnode(vp, waitfor|NO_INO_UPDT);
}
lockmgr(&sn->sn_lock, LK_RELEASE, NULL);
}
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_softdep.c
--- a/head/sys/ufs/ffs/ffs_softdep.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_softdep.c Fri Mar 02 17:10:47 2012 +0200
@@ -40,9 +40,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_softdep.c 227309 2011-11-07 15:43:11Z ed $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_softdep.c 232351 2012-03-01 18:45:25Z mckusick $");
#include "opt_ffs.h"
+#include "opt_quota.h"
#include "opt_ddb.h"
/*
@@ -2826,7 +2827,12 @@
{
struct ufsmount *ump;
- if (DOINGSUJ(vp) == 0)
+ /*
+ * Nothing to do if we are not running journaled soft updates.
+ * If we currently hold the snapshot lock, we must avoid handling
+ * other resources that could cause deadlock.
+ */
+ if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)))
return (0);
ump = VFSTOUFS(vp->v_mount);
ACQUIRE_LOCK(&lk);
@@ -2872,7 +2878,12 @@
ump = VFSTOUFS(dvp->v_mount);
mtx_assert(&lk, MA_OWNED);
- if (journal_space(ump, 0))
+ /*
+ * Nothing to do if we have sufficient journal space.
+ * If we currently hold the snapshot lock, we must avoid
+ * handling other resources that could cause deadlock.
+ */
+ if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
return;
stat_journal_low++;
FREE_LOCK(&lk);
@@ -4303,11 +4314,15 @@
struct inode *ip;
{
struct inodedep *inodedep;
+ int dflags;
KASSERT(ip->i_nlink >= ip->i_effnlink,
("inodedep_lookup_ip: bad delta"));
- (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number,
- DEPALLOC, &inodedep);
+ dflags = DEPALLOC;
+ if (IS_SNAPSHOT(ip))
+ dflags |= NODELAY;
+ (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags,
+ &inodedep);
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
return (inodedep);
@@ -4695,7 +4710,7 @@
* the cylinder group map from which it was allocated.
*/
ACQUIRE_LOCK(&lk);
- if ((inodedep_lookup(mp, newinum, DEPALLOC|NODELAY, &inodedep)))
+ if ((inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep)))
panic("softdep_setup_inomapdep: dependency %p for new"
"inode already exists", inodedep);
bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum));
@@ -5436,6 +5451,7 @@
struct allocindir *aip;
struct pagedep *pagedep;
struct mount *mp;
+ int dflags;
if (lbn != nbp->b_lblkno)
panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
@@ -5443,7 +5459,10 @@
ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
mp = UFSTOVFS(ip->i_ump);
aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
- (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+ dflags = DEPALLOC;
+ if (IS_SNAPSHOT(ip))
+ dflags |= NODELAY;
+ (void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
/*
* If we are allocating a directory page, then we must
* allocate an associated pagedep to track additions and
@@ -5473,11 +5492,15 @@
struct inodedep *inodedep;
struct allocindir *aip;
ufs_lbn_t lbn;
+ int dflags;
lbn = nbp->b_lblkno;
ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
- inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep);
+ dflags = DEPALLOC;
+ if (IS_SNAPSHOT(ip))
+ dflags |= NODELAY;
+ inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
panic("softdep_setup_allocindir_meta: Block already existed");
@@ -6084,11 +6107,7 @@
struct mount *mp;
ufs2_daddr_t extblocks, datablocks;
ufs_lbn_t tmpval, lbn, lastlbn;
- int frags;
- int lastoff, iboff;
- int allocblock;
- int error, i;
- int needj;
+ int frags, lastoff, iboff, allocblock, needj, dflags, error, i;
fs = ip->i_fs;
mp = UFSTOVFS(ip->i_ump);
@@ -6106,7 +6125,10 @@
* we don't need to journal the block frees. The canceled journals
* for the allocations will suffice.
*/
- inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+ dflags = DEPALLOC;
+ if (IS_SNAPSHOT(ip))
+ dflags |= NODELAY;
+ inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
length == 0)
needj = 0;
@@ -6231,7 +6253,7 @@
*((struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
ACQUIRE_LOCK(&lk);
- (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+ (void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
@@ -6309,7 +6331,7 @@
}
ACQUIRE_LOCK(&lk);
- inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+ inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
/*
@@ -6397,7 +6419,7 @@
struct fs *fs;
ufs2_daddr_t extblocks, datablocks;
struct mount *mp;
- int i, delay, error;
+ int i, delay, error, dflags;
ufs_lbn_t tmpval;
ufs_lbn_t lbn;
@@ -6428,7 +6450,7 @@
}
#ifdef QUOTA
/* Reference the quotas in case the block count is wrong in the end. */
- quotaref(vp, freeblks->fb_quota);
+ quotaref(ITOV(ip), freeblks->fb_quota);
(void) chkdq(ip, -datablocks, NOCRED, 0);
#endif
freeblks->fb_chkcnt = -datablocks;
@@ -6462,7 +6484,10 @@
* Find and eliminate any inode dependencies.
*/
ACQUIRE_LOCK(&lk);
- (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
+ dflags = DEPALLOC;
+ if (IS_SNAPSHOT(ip))
+ dflags |= NODELAY;
+ (void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
@@ -7160,13 +7185,16 @@
mtx_assert(&lk, MA_OWNED);
if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
+ !LIST_EMPTY(&inodedep->id_dirremhd) ||
!LIST_EMPTY(&inodedep->id_pendinghd) ||
!LIST_EMPTY(&inodedep->id_bufwait) ||
!LIST_EMPTY(&inodedep->id_inowait) ||
+ !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
!TAILQ_EMPTY(&inodedep->id_inoupdt) ||
!TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
!TAILQ_EMPTY(&inodedep->id_extupdt) ||
!TAILQ_EMPTY(&inodedep->id_newextupdt) ||
+ !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
inodedep->id_mkdiradd != NULL ||
inodedep->id_nlinkdelta != 0)
return (0);
@@ -8025,7 +8053,7 @@
dap->da_pagedep = pagedep;
LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
da_pdlist);
- inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
+ inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
/*
* If we're journaling, link the diradd into the jaddref so it
* may be completed after the journal entry is written. Otherwise,
@@ -8627,8 +8655,7 @@
* the number of freefile and freeblks structures.
*/
ACQUIRE_LOCK(&lk);
- if (!(ip->i_flags & SF_SNAPSHOT) &&
- dep_current[D_DIRREM] > max_softdeps / 2)
+ if (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2)
(void) request_cleanup(ITOV(dp)->v_mount, FLUSH_BLOCKS);
FREE_LOCK(&lk);
dirrem = malloc(sizeof(struct dirrem),
@@ -8862,11 +8889,11 @@
/*
* Lookup the jaddref for this journal entry. We must finish
* initializing it and make the diradd write dependent on it.
- * If we're not journaling Put it on the id_bufwait list if the inode
- * is not yet written. If it is written, do the post-inode write
- * processing to put it on the id_pendinghd list.
- */
- inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
+ * If we're not journaling, put it on the id_bufwait list if the
+ * inode is not yet written. If it is written, do the post-inode
+ * write processing to put it on the id_pendinghd list.
+ */
+ inodedep_lookup(mp, newinum, DEPALLOC | NODELAY, &inodedep);
if (MOUNTEDSUJ(mp)) {
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
inoreflst);
@@ -8908,9 +8935,13 @@
struct inode *ip; /* the inode with the increased link count */
{
struct inodedep *inodedep;
+ int dflags;
ACQUIRE_LOCK(&lk);
- inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep);
+ dflags = DEPALLOC;
+ if (IS_SNAPSHOT(ip))
+ dflags |= NODELAY;
+ inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
if (ip->i_nlink < ip->i_effnlink)
panic("softdep_change_linkcnt: bad delta");
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
@@ -12112,6 +12143,7 @@
case D_FREEWORK:
case D_FREEDEP:
case D_JSEGDEP:
+ case D_JNEWBLK:
continue;
default:
@@ -12539,22 +12571,25 @@
ufs2_daddr_t needed;
int error;
- mp = vp->v_mount;
- ump = VFSTOUFS(mp);
- mtx_assert(UFS_MTX(ump), MA_OWNED);
+ /*
+ * If we are being called because of a process doing a
+ * copy-on-write, then it is not safe to process any
+ * worklist items as we will recurse into the copyonwrite
+ * routine. This will result in an incoherent snapshot.
+ * If the vnode that we hold is a snapshot, we must avoid
+ * handling other resources that could cause deadlock.
+ */
+ if ((curthread->td_pflags & TDP_COWINPROGRESS) || IS_SNAPSHOT(VTOI(vp)))
+ return (0);
+
if (resource == FLUSH_BLOCKS_WAIT)
stat_cleanup_blkrequests += 1;
else
stat_cleanup_inorequests += 1;
- /*
- * If we are being called because of a process doing a
- * copy-on-write, then it is not safe to process any
- * worklist items as we will recurse into the copyonwrite
- * routine. This will result in an incoherent snapshot.
- */
- if (curthread->td_pflags & TDP_COWINPROGRESS)
- return (0);
+ mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+ mtx_assert(UFS_MTX(ump), MA_OWNED);
UFS_UNLOCK(ump);
error = ffs_update(vp, 1);
if (error != 0) {
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_vfsops.c
--- a/head/sys/ufs/ffs/ffs_vfsops.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_vfsops.c Fri Mar 02 17:10:47 2012 +0200
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 230249 2012-01-17 01:08:01Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 231160 2012-02-07 20:43:28Z mckusick $");
#include "opt_quota.h"
#include "opt_ufs.h"
@@ -1436,17 +1436,26 @@
int softdep_accdeps;
struct bufobj *bo;
+ wait = 0;
+ suspend = 0;
+ suspended = 0;
td = curthread;
fs = ump->um_fs;
if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
panic("%s: ffs_sync: modification on read-only filesystem",
fs->fs_fsmnt);
/*
+ * For a lazy sync, we just care about the filesystem metadata.
+ */
+ if (waitfor == MNT_LAZY) {
+ secondary_accwrites = 0;
+ secondary_writes = 0;
+ lockreq = 0;
+ goto metasync;
+ }
+ /*
* Write back each (modified) inode.
*/
- wait = 0;
- suspend = 0;
- suspended = 0;
lockreq = LK_EXCLUSIVE | LK_NOWAIT;
if (waitfor == MNT_SUSPEND) {
suspend = 1;
@@ -1517,11 +1526,12 @@
#ifdef QUOTA
qsync(mp);
#endif
+
+metasync:
devvp = ump->um_devvp;
bo = &devvp->v_bufobj;
BO_LOCK(bo);
- if (waitfor != MNT_LAZY &&
- (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
+ if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
BO_UNLOCK(bo);
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ffs/ffs_vnops.c
--- a/head/sys/ufs/ffs/ffs_vnops.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ffs/ffs_vnops.c Fri Mar 02 17:10:47 2012 +0200
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vnops.c 224503 2011-07-30 00:43:18Z mckusick $");
+__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vnops.c 232351 2012-03-01 18:45:25Z mckusick $");
#include <sys/param.h>
#include <sys/bio.h>
@@ -216,8 +216,10 @@
struct buf *bp;
struct buf *nbp;
ufs_lbn_t lbn;
- int error, wait, passes;
+ int error, wait, passes, noupdate;
+ noupdate = waitfor & NO_INO_UPDT;
+ waitfor &= ~NO_INO_UPDT;
ip = VTOI(vp);
ip->i_flag &= ~IN_NEEDSYNC;
bo = &vp->v_bufobj;
@@ -300,7 +302,10 @@
}
if (waitfor != MNT_WAIT) {
BO_UNLOCK(bo);
- return (ffs_update(vp, waitfor));
+ if (noupdate)
+ return (0);
+ else
+ return (ffs_update(vp, waitfor));
}
/* Drain IO to see if we're done. */
bufobj_wwait(bo, 0, 0);
@@ -317,7 +322,7 @@
*/
if (bo->bo_dirty.bv_cnt > 0) {
/* Write the inode after sync passes to flush deps. */
- if (wait && DOINGSOFTDEP(vp)) {
+ if (wait && DOINGSOFTDEP(vp) && noupdate == 0) {
BO_UNLOCK(bo);
ffs_update(vp, MNT_WAIT);
BO_LOCK(bo);
@@ -332,7 +337,9 @@
#endif
}
BO_UNLOCK(bo);
- error = ffs_update(vp, MNT_WAIT);
+ error = 0;
+ if (noupdate == 0)
+ error = ffs_update(vp, MNT_WAIT);
if (DOINGSUJ(vp))
softdep_journal_fsync(VTOI(vp));
return (error);
@@ -420,7 +427,8 @@
ufs_lbn_t lbn, nextlbn;
off_t bytesinfile;
long size, xfersize, blkoffset;
- int error, orig_resid;
+ ssize_t orig_resid;
+ int error;
int seqcount;
int ioflag;
@@ -633,8 +641,9 @@
struct buf *bp;
ufs_lbn_t lbn;
off_t osize;
+ ssize_t resid;
int seqcount;
- int blkoffset, error, flags, ioflag, resid, size, xfersize;
+ int blkoffset, error, flags, ioflag, size, xfersize;
vp = ap->a_vp;
uio = ap->a_uio;
@@ -718,15 +727,6 @@
vnode_pager_setsize(vp, ip->i_size);
break;
}
- /*
- * If the buffer is not valid we have to clear out any
- * garbage data from the pages instantiated for the buffer.
- * If we do not, a failed uiomove() during a write can leave
- * the prior contents of the pages exposed to a userland
- * mmap(). XXX deal with uiomove() errors a better way.
- */
- if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
- vfs_bio_clrbuf(bp);
if (ioflag & IO_DIRECT)
bp->b_flags |= B_DIRECT;
if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
@@ -743,6 +743,26 @@
error =
uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+ /*
+ * If the buffer is not already filled and we encounter an
+ * error while trying to fill it, we have to clear out any
+ * garbage data from the pages instantiated for the buffer.
+ * If we do not, a failed uiomove() during a write can leave
+ * the prior contents of the pages exposed to a userland mmap.
+ *
+ * Note that we need only clear buffers with a transfer size
+ * equal to the block size because buffers with a shorter
+ * transfer size were cleared above by the call to UFS_BALLOC()
+ * with the BA_CLRBUF flag set.
+ *
+ * If the source region for uiomove identically mmaps the
+ * buffer, uiomove() performed the NOP copy, and the buffer
+ * content remains valid because the page fault handler
+ * validated the pages.
+ */
+ if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
+ fs->fs_bsize == xfersize)
+ vfs_bio_clrbuf(bp);
if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
(LIST_EMPTY(&bp->b_dep))) {
bp->b_flags |= B_RELBUF;
@@ -860,7 +880,8 @@
ufs_lbn_t lbn, nextlbn;
off_t bytesinfile;
long size, xfersize, blkoffset;
- int error, orig_resid;
+ ssize_t orig_resid;
+ int error;
ip = VTOI(vp);
fs = ip->i_fs;
@@ -1013,7 +1034,8 @@
struct buf *bp;
ufs_lbn_t lbn;
off_t osize;
- int blkoffset, error, flags, resid, size, xfersize;
+ ssize_t resid;
+ int blkoffset, error, flags, size, xfersize;
ip = VTOI(vp);
fs = ip->i_fs;
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/inode.h
--- a/head/sys/ufs/ufs/inode.h Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/inode.h Fri Mar 02 17:10:47 2012 +0200
@@ -32,7 +32,7 @@
* SUCH DAMAGE.
*
* @(#)inode.h 8.9 (Berkeley) 5/14/95
- * $FreeBSD: head/sys/ufs/ufs/inode.h 224503 2011-07-30 00:43:18Z mckusick $
+ * $FreeBSD: head/sys/ufs/ufs/inode.h 232351 2012-03-01 18:45:25Z mckusick $
*/
#ifndef _UFS_UFS_INODE_H_
@@ -158,6 +158,7 @@
#define SHORTLINK(ip) \
(((ip)->i_ump->um_fstype == UFS1) ? \
(caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db)
+#define IS_SNAPSHOT(ip) ((ip)->i_flags & SF_SNAPSHOT)
/*
* Structure used to pass around logical block paths generated by
@@ -176,7 +177,6 @@
/* Determine if soft dependencies are being done */
#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
#define MOUNTEDSOFTDEP(mp) ((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ))
-#define DOINGASYNC(vp) ((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC)
#define DOINGSUJ(vp) ((vp)->v_mount->mnt_flag & MNT_SUJ)
#define MOUNTEDSUJ(mp) ((mp)->mnt_flag & MNT_SUJ)
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_acl.c
--- a/head/sys/ufs/ufs/ufs_acl.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_acl.c Fri Mar 02 17:10:47 2012 +0200
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_acl.c 231122 2012-02-07 09:51:41Z kib $");
#include "opt_ufs.h"
#include "opt_quota.h"
@@ -422,7 +422,8 @@
VN_KNOTE_UNLOCKED(vp, NOTE_ATTRIB);
- return (0);
+ error = UFS_UPDATE(vp, 0);
+ return (error);
}
static int
@@ -591,10 +592,11 @@
*/
ufs_sync_inode_from_acl(ap->a_aclp, ip);
ip->i_flag |= IN_CHANGE;
+ error = UFS_UPDATE(ap->a_vp, 0);
}
VN_KNOTE_UNLOCKED(ap->a_vp, NOTE_ATTRIB);
- return (0);
+ return (error);
}
int
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_lookup.c
--- a/head/sys/ufs/ufs/ufs_lookup.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_lookup.c Fri Mar 02 17:10:47 2012 +0200
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_lookup.c 222954 2011-06-10 22:18:25Z jeff $");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_lookup.c 231949 2012-02-21 01:05:12Z kib $");
#include "opt_ufs.h"
#include "opt_quota.h"
@@ -1337,7 +1337,8 @@
doff_t off;
struct dirtemplate dbuf;
struct direct *dp = (struct direct *)&dbuf;
- int error, count, namlen;
+ int error, namlen;
+ ssize_t count;
#define MINDIRSIZ (sizeof (struct dirtemplate) / 2)
for (off = 0; off < ip->i_size; off += dp->d_reclen) {
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_quota.c
--- a/head/sys/ufs/ufs/ufs_quota.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_quota.c Fri Mar 02 17:10:47 2012 +0200
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_quota.c 229828 2012-01-08 23:06:53Z kib $");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_quota.c 232003 2012-02-22 20:03:51Z kib $");
#include "opt_ffs.h"
@@ -1469,6 +1469,7 @@
if (dq == NODQUOT)
return;
DQH_LOCK();
+ KASSERT(dq->dq_cnt > 0, ("Lost dq %p reference 1", dq));
if (dq->dq_cnt > 1) {
dq->dq_cnt--;
DQH_UNLOCK();
@@ -1479,6 +1480,7 @@
(void) dqsync(vp, dq);
DQH_LOCK();
+ KASSERT(dq->dq_cnt > 0, ("Lost dq %p reference 2", dq));
if (--dq->dq_cnt > 0)
{
DQH_UNLOCK();
@@ -1658,6 +1660,7 @@
*/
found = 0;
ip = VTOI(vp);
+ mtx_lock(&dqhlock);
for (i = 0; i < MAXQUOTAS; i++) {
if ((dq = ip->i_dquot[i]) == NODQUOT)
continue;
@@ -1665,6 +1668,7 @@
qrp[i] = dq;
found++;
}
+ mtx_unlock(&dqhlock);
return (found);
}
diff -r 1e39889b0785 -r 37083b471c7e head/sys/ufs/ufs/ufs_vnops.c
--- a/head/sys/ufs/ufs/ufs_vnops.c Fri Mar 02 17:10:07 2012 +0200
+++ b/head/sys/ufs/ufs/ufs_vnops.c Fri Mar 02 17:10:47 2012 +0200
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 226971 2011-10-31 20:24:33Z pho $");
+__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 231122 2012-02-07 09:51:41Z kib $");
#include "opt_quota.h"
#include "opt_suiddir.h"
@@ -571,8 +571,9 @@
DIP_SET(ip, i_flags, ip->i_flags);
}
ip->i_flag |= IN_CHANGE;
+ error = UFS_UPDATE(vp, 0);
if (vap->va_flags & (IMMUTABLE | APPEND))
- return (0);
+ return (error);
}
if (ip->i_flags & (IMMUTABLE | APPEND))
return (EPERM);
@@ -738,6 +739,9 @@
VI_LOCK(vp);
ip->i_flag |= IN_ACCESS;
VI_UNLOCK(vp);
+ /*
+ * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
+ */
return (0);
}
@@ -794,6 +798,9 @@
if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
#endif
+ if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
+ error = UFS_UPDATE(vp, 0);
+
return (error);
}
@@ -912,7 +919,8 @@
DIP_SET(ip, i_mode, ip->i_mode);
}
}
- return (0);
+ error = UFS_UPDATE(vp, 0);
+ return (error);
}
static int
@@ -2079,6 +2087,7 @@
dp->i_nlink--;
DIP_SET(dp, i_nlink, dp->i_nlink);
dp->i_flag |= IN_CHANGE;
+ error = UFS_UPDATE(dvp, 0);
ip->i_nlink--;
DIP_SET(ip, i_nlink, ip->i_nlink);
ip->i_flag |= IN_CHANGE;
@@ -2122,6 +2131,7 @@
ip->i_size = len;
DIP_SET(ip, i_size, len);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ error = UFS_UPDATE(vp, 0);
} else
error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
More information about the Zrouter-src-freebsd
mailing list