summaryrefslogtreecommitdiffstats
path: root/kernel/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fs/xfs')
-rw-r--r--kernel/fs/xfs/libxfs/xfs_attr_remote.c44
-rw-r--r--kernel/fs/xfs/xfs_attr_inactive.c10
-rw-r--r--kernel/fs/xfs/xfs_inode.c68
-rw-r--r--kernel/fs/xfs/xfs_inode.h85
-rw-r--r--kernel/fs/xfs/xfs_log_recover.c11
-rw-r--r--kernel/fs/xfs/xfs_symlink.c2
6 files changed, 161 insertions, 59 deletions
diff --git a/kernel/fs/xfs/libxfs/xfs_attr_remote.c b/kernel/fs/xfs/libxfs/xfs_attr_remote.c
index 20de88d1b..dd714037c 100644
--- a/kernel/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/kernel/fs/xfs/libxfs/xfs_attr_remote.c
@@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ int blksize = mp->m_attr_geo->blksize;
char *ptr;
int len;
xfs_daddr_t bno;
- int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify(
ASSERT(len >= blksize);
while (len > 0) {
+ struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
+
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
- if (bip) {
- struct xfs_attr3_rmt_hdr *rmt;
- rmt = (struct xfs_attr3_rmt_hdr *)ptr;
- rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ /*
+ * Ensure we aren't writing bogus LSNs to disk. See
+ * xfs_attr3_rmt_hdr_set() for the explanation.
+ */
+ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
@@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set(
rmt->rm_owner = cpu_to_be64(ino);
rmt->rm_blkno = cpu_to_be64(bno);
+ /*
+ * Remote attribute blocks are written synchronously, so we don't
+ * have an LSN that we can stamp in them that makes any sense to log
+ * recovery. To ensure that log recovery handles overwrites of these
+ * blocks sanely (i.e. once they've been freed and reallocated as some
+ * other type of metadata) we need to ensure that the LSN has a value
+ * that tells log recovery to ignore the LSN and overwrite the buffer
+ * with whatever is in it's log. To do this, we use the magic
+ * NULLCOMMITLSN to indicate that the LSN is invalid.
+ */
+ rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
+
return sizeof(struct xfs_attr3_rmt_hdr);
}
@@ -434,14 +451,21 @@ xfs_attr_rmtval_set(
/*
* Allocate a single extent, up to the size of the value.
+ *
+ * Note that we have to consider this a data allocation as we
+ * write the remote attribute without logging the contents.
+ * Hence we must ensure that we aren't using blocks that are on
+ * the busy list so that we don't overwrite blocks which have
+ * recently been freed but their transactions are not yet
+ * committed to disk. If we overwrite the contents of a busy
+ * extent and then crash then the block may not contain the
+ * correct metadata after log recovery occurs.
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
- blkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- args->firstblock, args->total, &map, &nmap,
- args->flist);
+ blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
+ args->total, &map, &nmap, args->flist);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
diff --git a/kernel/fs/xfs/xfs_attr_inactive.c b/kernel/fs/xfs/xfs_attr_inactive.c
index 3fbf167cf..73e75a87a 100644
--- a/kernel/fs/xfs/xfs_attr_inactive.c
+++ b/kernel/fs/xfs/xfs_attr_inactive.c
@@ -435,8 +435,14 @@ xfs_attr_inactive(
*/
xfs_trans_ijoin(trans, dp, 0);
- /* invalidate and truncate the attribute fork extents */
- if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+ /*
+ * Invalidate and truncate the attribute fork extents. Make sure the
+ * fork actually has attributes as otherwise the invalidation has no
+ * blocks to read and returns an error. In this case, just do the fork
+ * removal below.
+ */
+ if (xfs_inode_hasattr(dp) &&
+ dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
diff --git a/kernel/fs/xfs/xfs_inode.c b/kernel/fs/xfs/xfs_inode.c
index 539a85fdd..fec4bfba0 100644
--- a/kernel/fs/xfs/xfs_inode.c
+++ b/kernel/fs/xfs/xfs_inode.c
@@ -164,7 +164,7 @@ xfs_ilock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL)
mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
@@ -212,7 +212,7 @@ xfs_ilock_nowait(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) {
if (!mrtryupdate(&ip->i_iolock))
@@ -281,7 +281,7 @@ xfs_iunlock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
ASSERT(lock_flags != 0);
if (lock_flags & XFS_IOLOCK_EXCL)
@@ -364,30 +364,38 @@ int xfs_lock_delays;
/*
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
- * value. This shouldn't be called for page fault locking, but we also need to
- * ensure we don't overrun the number of lockdep subclasses for the iolock or
- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
+ * value. This can be called for any type of inode lock combination, including
+ * parent locking. Care must be taken to ensure we don't overrun the subclass
+ * storage fields in the class mask we build.
*/
static inline int
xfs_lock_inumorder(int lock_mode, int subclass)
{
+ int class = 0;
+
+ ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
+ XFS_ILOCK_RTSUM)));
+
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
- ASSERT(subclass + XFS_LOCK_INUMORDER <
- (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+ ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
+ ASSERT(subclass + XFS_IOLOCK_PARENT_VAL <
+ MAX_LOCKDEP_SUBCLASSES);
+ class += subclass << XFS_IOLOCK_SHIFT;
+ if (lock_mode & XFS_IOLOCK_PARENT)
+ class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
}
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
- ASSERT(subclass + XFS_LOCK_INUMORDER <
- (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
- XFS_MMAPLOCK_SHIFT;
+ ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
+ class += subclass << XFS_MMAPLOCK_SHIFT;
}
- if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+ if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
+ ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
+ class += subclass << XFS_ILOCK_SHIFT;
+ }
- return lock_mode;
+ return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
}
/*
@@ -399,6 +407,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
* transaction (such as truncate). This can result in deadlock since the long
* running trans might need to wait for the inode we just locked in order to
* push the tail and free space in the log.
+ *
+ * xfs_lock_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
*/
void
xfs_lock_inodes(
@@ -409,8 +422,29 @@ xfs_lock_inodes(
int attempts = 0, i, j, try_lock;
xfs_log_item_t *lp;
- /* currently supports between 2 and 5 inodes */
+ /*
+ * Currently supports between 2 and 5 inodes with exclusive locking. We
+ * support an arbitrary depth of locking here, but absolute limits on
+ * inodes depend on the the type of locking and the limits placed by
+ * lockdep annotations in xfs_lock_inumorder. These are all checked by
+ * the asserts.
+ */
ASSERT(ips && inodes >= 2 && inodes <= 5);
+ ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
+ XFS_ILOCK_EXCL));
+ ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
+ XFS_ILOCK_SHARED)));
+ ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
+ inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
+ ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
+ inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
+ ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
+ inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
+
+ if (lock_mode & XFS_IOLOCK_EXCL) {
+ ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
+ } else if (lock_mode & XFS_MMAPLOCK_EXCL)
+ ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
try_lock = 0;
i = 0;
diff --git a/kernel/fs/xfs/xfs_inode.h b/kernel/fs/xfs/xfs_inode.h
index 8f22d2036..ee26a603c 100644
--- a/kernel/fs/xfs/xfs_inode.h
+++ b/kernel/fs/xfs/xfs_inode.h
@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* Flags for lockdep annotations.
*
* XFS_LOCK_PARENT - for directory operations that require locking a
- * parent directory inode and a child entry inode. The parent gets locked
- * with this flag so it gets a lockdep subclass of 1 and the child entry
- * lock will have a lockdep subclass of 0.
+ * parent directory inode and a child entry inode. IOLOCK requires nesting,
+ * MMAPLOCK does not support this class, ILOCK requires a single subclass
+ * to differentiate parent from child.
*
* XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
* inodes do not participate in the normal lock order, and thus have their
@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* XFS_LOCK_INUMORDER - for locking several inodes at the some time
* with xfs_lock_inodes(). This flag is used as the starting subclass
* and each subsequent lock acquired will increment the subclass by one.
- * So the first lock acquired will have a lockdep subclass of 4, the
- * second lock will have a lockdep subclass of 5, and so on. It is
- * the responsibility of the class builder to shift this to the correct
- * portion of the lock_mode lockdep mask.
+ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
+ * limited to the subclasses we can represent via nesting. We need at least
+ * 5 inodes nest depth for the ILOCK through rename, and we also have to support
+ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
+ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
+ * 8 subclasses supported by lockdep.
+ *
+ * This also means we have to number the sub-classes in the lowest bits of
+ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
+ * mask and we can't use bit-masking to build the subclasses. What a mess.
+ *
+ * Bit layout:
+ *
+ * Bit Lock Region
+ * 16-19 XFS_IOLOCK_SHIFT dependencies
+ * 20-23 XFS_MMAPLOCK_SHIFT dependencies
+ * 24-31 XFS_ILOCK_SHIFT dependencies
+ *
+ * IOLOCK values
+ *
+ * 0-3 subclass value
+ * 4-7 PARENT subclass values
+ *
+ * MMAPLOCK values
+ *
+ * 0-3 subclass value
+ * 4-7 unused
+ *
+ * ILOCK values
+ * 0-4 subclass values
+ * 5 PARENT subclass (not nestable)
+ * 6 RTBITMAP subclass (not nestable)
+ * 7 RTSUM subclass (not nestable)
+ *
*/
-#define XFS_LOCK_PARENT 1
-#define XFS_LOCK_RTBITMAP 2
-#define XFS_LOCK_RTSUM 3
-#define XFS_LOCK_INUMORDER 4
-
-#define XFS_IOLOCK_SHIFT 16
-#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
-
-#define XFS_MMAPLOCK_SHIFT 20
-
-#define XFS_ILOCK_SHIFT 24
-#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
-
-#define XFS_IOLOCK_DEP_MASK 0x000f0000
-#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
-#define XFS_ILOCK_DEP_MASK 0xff000000
-#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
+#define XFS_IOLOCK_SHIFT 16
+#define XFS_IOLOCK_PARENT_VAL 4
+#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
+#define XFS_IOLOCK_DEP_MASK 0x000f0000
+#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
+
+#define XFS_MMAPLOCK_SHIFT 20
+#define XFS_MMAPLOCK_NUMORDER 0
+#define XFS_MMAPLOCK_MAX_SUBCLASS 3
+#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
+
+#define XFS_ILOCK_SHIFT 24
+#define XFS_ILOCK_PARENT_VAL 5
+#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
+#define XFS_ILOCK_RTBITMAP_VAL 6
+#define XFS_ILOCK_RTSUM_VAL 7
+#define XFS_ILOCK_DEP_MASK 0xff000000
+#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
+
+#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \
XFS_ILOCK_DEP_MASK)
diff --git a/kernel/fs/xfs/xfs_log_recover.c b/kernel/fs/xfs/xfs_log_recover.c
index 4f5784f85..a5d03396d 100644
--- a/kernel/fs/xfs/xfs_log_recover.c
+++ b/kernel/fs/xfs/xfs_log_recover.c
@@ -1887,9 +1887,14 @@ xlog_recover_get_buf_lsn(
uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
break;
case XFS_ATTR3_RMT_MAGIC:
- lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
- uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
- break;
+ /*
+ * Remote attr blocks are written synchronously, rather than
+ * being logged. That means they do not contain a valid LSN
+ * (i.e. transactionally ordered) in them, and hence any time we
+ * see a buffer to replay over the top of a remote attribute
+ * block we should simply do so.
+ */
+ goto recover_immediately;
case XFS_SB_MAGIC:
lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
uuid = &((struct xfs_dsb *)blk)->sb_uuid;
diff --git a/kernel/fs/xfs/xfs_symlink.c b/kernel/fs/xfs/xfs_symlink.c
index 3df411ead..40c076523 100644
--- a/kernel/fs/xfs/xfs_symlink.c
+++ b/kernel/fs/xfs/xfs_symlink.c
@@ -104,7 +104,7 @@ xfs_readlink_bmap(
cur_chunk += sizeof(struct xfs_dsymlink_hdr);
}
- memcpy(link + offset, bp->b_addr, byte_cnt);
+ memcpy(link + offset, cur_chunk, byte_cnt);
pathlen -= byte_cnt;
offset += byte_cnt;