summaryrefslogtreecommitdiffstats
path: root/kernel/fs
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fs')
-rw-r--r--kernel/fs/9p/vfs_inode.c3
-rw-r--r--kernel/fs/9p/vfs_inode_dotl.c3
-rw-r--r--kernel/fs/btrfs/inode-map.c17
-rw-r--r--kernel/fs/btrfs/ioctl.c22
-rw-r--r--kernel/fs/btrfs/transaction.c4
-rw-r--r--kernel/fs/btrfs/tree-log.c14
-rw-r--r--kernel/fs/dcache.c7
-rw-r--r--kernel/fs/ext4/extents.c6
-rw-r--r--kernel/fs/ext4/indirect.c2
-rw-r--r--kernel/fs/ext4/inode.c45
-rw-r--r--kernel/fs/ext4/mballoc.c16
-rw-r--r--kernel/fs/ext4/migrate.c17
-rw-r--r--kernel/fs/ext4/super.c4
-rw-r--r--kernel/fs/fuse/inode.c2
-rw-r--r--kernel/fs/hpfs/super.c18
-rw-r--r--kernel/fs/jbd2/checkpoint.c7
-rw-r--r--kernel/fs/jbd2/journal.c38
-rw-r--r--kernel/fs/namespace.c42
-rw-r--r--kernel/fs/nfs/flexfilelayout/flexfilelayout.c2
-rw-r--r--kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c7
-rw-r--r--kernel/fs/nfs/inode.c8
-rw-r--r--kernel/fs/nfs/nfs3xdr.c2
-rw-r--r--kernel/fs/nfs/nfs4proc.c3
-rw-r--r--kernel/fs/nfs/nfs4state.c2
-rw-r--r--kernel/fs/nfs/pagelist.c5
-rw-r--r--kernel/fs/nfs/pnfs.c3
-rw-r--r--kernel/fs/nfs/write.c1
-rw-r--r--kernel/fs/nfsd/nfs4state.c101
-rw-r--r--kernel/fs/nfsd/nfs4xdr.c11
-rw-r--r--kernel/fs/notify/mark.c30
-rw-r--r--kernel/fs/ocfs2/aops.c4
-rw-r--r--kernel/fs/ocfs2/dlmglue.c10
-rw-r--r--kernel/fs/overlayfs/readdir.c77
-rw-r--r--kernel/fs/pnode.h2
-rw-r--r--kernel/fs/signalfd.c5
-rw-r--r--kernel/fs/xfs/libxfs/xfs_attr_remote.c44
-rw-r--r--kernel/fs/xfs/xfs_attr_inactive.c10
-rw-r--r--kernel/fs/xfs/xfs_inode.c68
-rw-r--r--kernel/fs/xfs/xfs_inode.h85
-rw-r--r--kernel/fs/xfs/xfs_log_recover.c11
-rw-r--r--kernel/fs/xfs/xfs_symlink.c2
41 files changed, 522 insertions, 238 deletions
diff --git a/kernel/fs/9p/vfs_inode.c b/kernel/fs/9p/vfs_inode.c
index 703342e30..53f1e8a21 100644
--- a/kernel/fs/9p/vfs_inode.c
+++ b/kernel/fs/9p/vfs_inode.c
@@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
unlock_new_inode(inode);
return inode;
error:
- unlock_new_inode(inode);
- iput(inode);
+ iget_failed(inode);
return ERR_PTR(retval);
}
diff --git a/kernel/fs/9p/vfs_inode_dotl.c b/kernel/fs/9p/vfs_inode_dotl.c
index 9861c7c95..4d3ecfb55 100644
--- a/kernel/fs/9p/vfs_inode_dotl.c
+++ b/kernel/fs/9p/vfs_inode_dotl.c
@@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
unlock_new_inode(inode);
return inode;
error:
- unlock_new_inode(inode);
- iput(inode);
+ iget_failed(inode);
return ERR_PTR(retval);
}
diff --git a/kernel/fs/btrfs/inode-map.c b/kernel/fs/btrfs/inode-map.c
index f6a596d5a..d4a582ac3 100644
--- a/kernel/fs/btrfs/inode-map.c
+++ b/kernel/fs/btrfs/inode-map.c
@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
+ spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
struct btrfs_free_space *info;
struct rb_node *n;
u64 count;
@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
return;
while (1) {
+ bool add_to_ctl = true;
+
+ spin_lock(rbroot_lock);
n = rb_first(rbroot);
- if (!n)
+ if (!n) {
+ spin_unlock(rbroot_lock);
break;
+ }
info = rb_entry(n, struct btrfs_free_space, offset_index);
BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress)
- goto free;
+ add_to_ctl = false;
else if (info->offset + info->bytes > root->ino_cache_progress)
count = root->ino_cache_progress - info->offset + 1;
else
count = info->bytes;
- __btrfs_add_free_space(ctl, info->offset, count);
-free:
rb_erase(&info->offset_index, rbroot);
- kfree(info);
+ spin_unlock(rbroot_lock);
+ if (add_to_ctl)
+ __btrfs_add_free_space(ctl, info->offset, count);
+ kmem_cache_free(btrfs_free_space_cachep, info);
}
}
diff --git a/kernel/fs/btrfs/ioctl.c b/kernel/fs/btrfs/ioctl.c
index 1c22c6518..37d456a9a 100644
--- a/kernel/fs/btrfs/ioctl.c
+++ b/kernel/fs/btrfs/ioctl.c
@@ -2413,8 +2413,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out_unlock_inode;
}
- d_invalidate(dentry);
-
down_write(&root->fs_info->subvol_sem);
err = may_destroy_subvol(dest);
@@ -2508,7 +2506,7 @@ out_up_write:
out_unlock_inode:
mutex_unlock(&inode->i_mutex);
if (!err) {
- shrink_dcache_sb(root->fs_info->sb);
+ d_invalidate(dentry);
btrfs_invalidate_inodes(dest);
d_delete(dentry);
ASSERT(dest->send_in_progress == 0);
@@ -2940,7 +2938,7 @@ out_unlock:
static long btrfs_ioctl_file_extent_same(struct file *file,
struct btrfs_ioctl_same_args __user *argp)
{
- struct btrfs_ioctl_same_args *same;
+ struct btrfs_ioctl_same_args *same = NULL;
struct btrfs_ioctl_same_extent_info *info;
struct inode *src = file_inode(file);
u64 off;
@@ -2970,6 +2968,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
if (IS_ERR(same)) {
ret = PTR_ERR(same);
+ same = NULL;
goto out;
}
@@ -3040,6 +3039,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
out:
mnt_drop_write_file(file);
+ kfree(same);
return ret;
}
@@ -3434,6 +3434,20 @@ process_slot:
u64 trim = 0;
u64 aligned_end = 0;
+ /*
+ * Don't copy an inline extent into an offset
+ * greater than zero. Having an inline extent
+ * at such an offset results in chaos as btrfs
+ * isn't prepared for such cases. Just skip
+ * this case for the same reasons as commented
+ * at btrfs_ioctl_clone().
+ */
+ if (last_dest_end > 0) {
+ ret = -EOPNOTSUPP;
+ btrfs_end_transaction(trans, root);
+ goto out;
+ }
+
if (off > key.offset) {
skip = off - key.offset;
new_key.offset += skip;
diff --git a/kernel/fs/btrfs/transaction.c b/kernel/fs/btrfs/transaction.c
index 5628e2525..94e909c5a 100644
--- a/kernel/fs/btrfs/transaction.c
+++ b/kernel/fs/btrfs/transaction.c
@@ -758,7 +758,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (!list_empty(&trans->ordered)) {
spin_lock(&info->trans_lock);
- list_splice(&trans->ordered, &cur_trans->pending_ordered);
+ list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
spin_unlock(&info->trans_lock);
}
@@ -1848,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
}
spin_lock(&root->fs_info->trans_lock);
- list_splice(&trans->ordered, &cur_trans->pending_ordered);
+ list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
spin_unlock(&root->fs_info->trans_lock);
atomic_inc(&cur_trans->use_count);
diff --git a/kernel/fs/btrfs/tree-log.c b/kernel/fs/btrfs/tree-log.c
index d04968374..4920fceff 100644
--- a/kernel/fs/btrfs/tree-log.c
+++ b/kernel/fs/btrfs/tree-log.c
@@ -4161,6 +4161,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
u64 ino = btrfs_ino(inode);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 logged_isize = 0;
+ bool need_log_inode_item = true;
path = btrfs_alloc_path();
if (!path)
@@ -4269,11 +4270,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
} else {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
- ret = log_inode_item(trans, log, dst_path, inode);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
goto log_extents;
}
@@ -4296,6 +4292,9 @@ again:
if (min_key.type > max_key.type)
break;
+ if (min_key.type == BTRFS_INODE_ITEM_KEY)
+ need_log_inode_item = false;
+
src = path->nodes[0];
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
@@ -4366,6 +4365,11 @@ next_slot:
log_extents:
btrfs_release_path(path);
btrfs_release_path(dst_path);
+ if (need_log_inode_item) {
+ err = log_inode_item(trans, log, dst_path, inode);
+ if (err)
+ goto out_unlock;
+ }
if (fast_search) {
/*
* Some ordered extents started by fsync might have completed
diff --git a/kernel/fs/dcache.c b/kernel/fs/dcache.c
index e1d20e11e..c1dad9243 100644
--- a/kernel/fs/dcache.c
+++ b/kernel/fs/dcache.c
@@ -643,7 +643,7 @@ static inline bool fast_dput(struct dentry *dentry)
/*
* If we have a d_op->d_delete() operation, we sould not
- * let the dentry count go to zero, so use "put__or_lock".
+ * let the dentry count go to zero, so use "put_or_lock".
*/
if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
return lockref_put_or_lock(&dentry->d_lockref);
@@ -698,7 +698,7 @@ static inline bool fast_dput(struct dentry *dentry)
*/
smp_rmb();
d_flags = ACCESS_ONCE(dentry->d_flags);
- d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST;
+ d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
/* Nothing to do? Dropping the reference was all we needed? */
if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
@@ -777,6 +777,9 @@ repeat:
if (unlikely(d_unhashed(dentry)))
goto kill_it;
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+ goto kill_it;
+
if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
if (dentry->d_op->d_delete(dentry))
goto kill_it;
diff --git a/kernel/fs/ext4/extents.c b/kernel/fs/ext4/extents.c
index e003a1e81..87ba10d1d 100644
--- a/kernel/fs/ext4/extents.c
+++ b/kernel/fs/ext4/extents.c
@@ -503,7 +503,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
struct buffer_head *bh;
int err;
- bh = sb_getblk(inode->i_sb, pblk);
+ bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh))
return ERR_PTR(-ENOMEM);
@@ -1088,7 +1088,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
err = -EIO;
goto cleanup;
}
- bh = sb_getblk(inode->i_sb, newblock);
+ bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh)) {
err = -ENOMEM;
goto cleanup;
@@ -1282,7 +1282,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
if (newblock == 0)
return err;
- bh = sb_getblk(inode->i_sb, newblock);
+ bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
if (unlikely(!bh))
return -ENOMEM;
lock_buffer(bh);
diff --git a/kernel/fs/ext4/indirect.c b/kernel/fs/ext4/indirect.c
index 958824019..94ae6874c 100644
--- a/kernel/fs/ext4/indirect.c
+++ b/kernel/fs/ext4/indirect.c
@@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
"non-extent mapped inodes with bigalloc");
- return -ENOSPC;
+ return -EUCLEAN;
}
/* Set up for the direct block allocation */
diff --git a/kernel/fs/ext4/inode.c b/kernel/fs/ext4/inode.c
index 0554b0b59..966c61482 100644
--- a/kernel/fs/ext4/inode.c
+++ b/kernel/fs/ext4/inode.c
@@ -1342,7 +1342,7 @@ static void ext4_da_page_release_reservation(struct page *page,
unsigned int offset,
unsigned int length)
{
- int to_release = 0;
+ int to_release = 0, contiguous_blks = 0;
struct buffer_head *head, *bh;
unsigned int curr_off = 0;
struct inode *inode = page->mapping->host;
@@ -1363,14 +1363,23 @@ static void ext4_da_page_release_reservation(struct page *page,
if ((offset <= curr_off) && (buffer_delay(bh))) {
to_release++;
+ contiguous_blks++;
clear_buffer_delay(bh);
+ } else if (contiguous_blks) {
+ lblk = page->index <<
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ lblk += (curr_off >> inode->i_blkbits) -
+ contiguous_blks;
+ ext4_es_remove_extent(inode, lblk, contiguous_blks);
+ contiguous_blks = 0;
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
- if (to_release) {
+ if (contiguous_blks) {
lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- ext4_es_remove_extent(inode, lblk, to_release);
+ lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
+ ext4_es_remove_extent(inode, lblk, contiguous_blks);
}
/* If we have released all the blocks belonging to a cluster, then we
@@ -1701,19 +1710,32 @@ static int __ext4_journalled_writepage(struct page *page,
ext4_walk_page_buffers(handle, page_bufs, 0, len,
NULL, bget_one);
}
- /* As soon as we unlock the page, it can go away, but we have
- * references to buffers so we are safe */
+ /*
+ * We need to release the page lock before we start the
+ * journal, so grab a reference so the page won't disappear
+ * out from under us.
+ */
+ get_page(page);
unlock_page(page);
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
ext4_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out;
+ put_page(page);
+ goto out_no_pagelock;
}
-
BUG_ON(!ext4_handle_valid(handle));
+ lock_page(page);
+ put_page(page);
+ if (page->mapping != mapping) {
+ /* The page got truncated from under us */
+ ext4_journal_stop(handle);
+ ret = 0;
+ goto out;
+ }
+
if (inline_data) {
BUFFER_TRACE(inode_bh, "get write access");
ret = ext4_journal_get_write_access(handle, inode_bh);
@@ -1739,6 +1761,8 @@ static int __ext4_journalled_writepage(struct page *page,
NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out:
+ unlock_page(page);
+out_no_pagelock:
brelse(inode_bh);
return ret;
}
@@ -4345,7 +4369,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
int inode_size = EXT4_INODE_SIZE(sb);
oi.orig_ino = orig_ino;
- ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
+ /*
+ * Calculate the first inode in the inode table block. Inode
+ * numbers are one-based. That is, the first inode in a block
+ * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
+ */
+ ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
if (ino == orig_ino)
continue;
diff --git a/kernel/fs/ext4/mballoc.c b/kernel/fs/ext4/mballoc.c
index 8d1e60214..41260489d 100644
--- a/kernel/fs/ext4/mballoc.c
+++ b/kernel/fs/ext4/mballoc.c
@@ -4800,18 +4800,12 @@ do_more:
/*
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
+ *
+ * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
+ * to fail.
*/
- retry:
- new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
- if (!new_entry) {
- /*
- * We use a retry loop because
- * ext4_free_blocks() is not allowed to fail.
- */
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto retry;
- }
+ new_entry = kmem_cache_alloc(ext4_free_data_cachep,
+ GFP_NOFS|__GFP_NOFAIL);
new_entry->efd_start_cluster = bit;
new_entry->efd_group = block_group;
new_entry->efd_count = count_clusters;
diff --git a/kernel/fs/ext4/migrate.c b/kernel/fs/ext4/migrate.c
index b52374e42..6163ad21c 100644
--- a/kernel/fs/ext4/migrate.c
+++ b/kernel/fs/ext4/migrate.c
@@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode)
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_extent *ex;
unsigned int i, len;
+ ext4_lblk_t start, end;
ext4_fsblk_t blk;
handle_t *handle;
int ret;
@@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode)
EXT4_FEATURE_RO_COMPAT_BIGALLOC))
return -EOPNOTSUPP;
+ /*
+ * In order to get correct extent info, force all delayed allocation
+ * blocks to be allocated, otherwise delayed allocation blocks may not
+ * be reflected and bypass the checks on extent header.
+ */
+ if (test_opt(inode->i_sb, DELALLOC))
+ ext4_alloc_da_blocks(inode);
+
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -650,11 +659,13 @@ int ext4_ind_migrate(struct inode *inode)
goto errout;
}
if (eh->eh_entries == 0)
- blk = len = 0;
+ blk = len = start = end = 0;
else {
len = le16_to_cpu(ex->ee_len);
blk = ext4_ext_pblock(ex);
- if (len > EXT4_NDIR_BLOCKS) {
+ start = le32_to_cpu(ex->ee_block);
+ end = start + len - 1;
+ if (end >= EXT4_NDIR_BLOCKS) {
ret = -EOPNOTSUPP;
goto errout;
}
@@ -662,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode)
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
memset(ei->i_data, 0, sizeof(ei->i_data));
- for (i=0; i < len; i++)
+ for (i = start; i <= end; i++)
ei->i_data[i] = cpu_to_le32(blk++);
ext4_mark_inode_dirty(handle, inode);
errout:
diff --git a/kernel/fs/ext4/super.c b/kernel/fs/ext4/super.c
index ca9d4a2fe..ca12affdb 100644
--- a/kernel/fs/ext4/super.c
+++ b/kernel/fs/ext4/super.c
@@ -807,6 +807,7 @@ static void ext4_put_super(struct super_block *sb)
dump_orphan_list(sb, sbi);
J_ASSERT(list_empty(&sbi->s_orphan));
+ sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
/*
@@ -4943,6 +4944,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
+ if (*flags & MS_LAZYTIME)
+ sb->s_flags |= MS_LAZYTIME;
+
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
diff --git a/kernel/fs/fuse/inode.c b/kernel/fs/fuse/inode.c
index 18dacf9ed..708d69711 100644
--- a/kernel/fs/fuse/inode.c
+++ b/kernel/fs/fuse/inode.c
@@ -1026,6 +1026,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err_fput;
fuse_conn_init(fc);
+ fc->release = fuse_free_conn;
fc->dev = sb->s_dev;
fc->sb = sb;
@@ -1040,7 +1041,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->dont_mask = 1;
sb->s_flags |= MS_POSIXACL;
- fc->release = fuse_free_conn;
fc->flags = d.flags;
fc->user_id = d.user_id;
fc->group_id = d.group_id;
diff --git a/kernel/fs/hpfs/super.c b/kernel/fs/hpfs/super.c
index 7cd00d3a7..8685c6557 100644
--- a/kernel/fs/hpfs/super.c
+++ b/kernel/fs/hpfs/super.c
@@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s)
}
/* Filesystem error... */
-static char err_buf[1024];
-
void hpfs_error(struct super_block *s, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_err("filesystem error: %pV", &vaf);
+
va_end(args);
- pr_err("filesystem error: %s", err_buf);
if (!hpfs_sb(s)->sb_was_error) {
if (hpfs_sb(s)->sb_err == 2) {
pr_cont("; crashing the system because you wanted it\n");
@@ -424,11 +427,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
int o;
struct hpfs_sb_info *sbi = hpfs_sb(s);
char *new_opts = kstrdup(data, GFP_KERNEL);
-
+
+ if (!new_opts)
+ return -ENOMEM;
+
sync_filesystem(s);
*flags |= MS_NOATIME;
-
+
hpfs_lock(s);
uid = sbi->sb_uid; gid = sbi->sb_gid;
umask = 0777 & ~sbi->sb_mode;
diff --git a/kernel/fs/jbd2/checkpoint.c b/kernel/fs/jbd2/checkpoint.c
index 6f691c289..9c00e2e5a 100644
--- a/kernel/fs/jbd2/checkpoint.c
+++ b/kernel/fs/jbd2/checkpoint.c
@@ -392,7 +392,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
unsigned long blocknr;
if (is_journal_aborted(journal))
- return 1;
+ return -EIO;
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
@@ -407,10 +407,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* jbd2_cleanup_journal_tail() doesn't get called all that often.
*/
if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
- __jbd2_update_log_tail(journal, first_tid, blocknr);
- return 0;
+ return __jbd2_update_log_tail(journal, first_tid, blocknr);
}
diff --git a/kernel/fs/jbd2/journal.c b/kernel/fs/jbd2/journal.c
index b96bd8076..112fad9e1 100644
--- a/kernel/fs/jbd2/journal.c
+++ b/kernel/fs/jbd2/journal.c
@@ -885,9 +885,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
*
* Requires j_checkpoint_mutex
*/
-void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
{
unsigned long freed;
+ int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
@@ -897,7 +898,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
* space and if we lose sb update during power failure we'd replay
* old transaction with possibly newly overwritten data.
*/
- jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+ ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+ if (ret)
+ goto out;
+
write_lock(&journal->j_state_lock);
freed = block - journal->j_tail;
if (block < journal->j_tail)
@@ -913,6 +917,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
journal->j_tail_sequence = tid;
journal->j_tail = block;
write_unlock(&journal->j_state_lock);
+
+out:
+ return ret;
}
/*
@@ -1331,7 +1338,7 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal);
}
-static void jbd2_write_superblock(journal_t *journal, int write_op)
+static int jbd2_write_superblock(journal_t *journal, int write_op)
{
struct buffer_head *bh = journal->j_sb_buffer;
journal_superblock_t *sb = journal->j_superblock;
@@ -1370,7 +1377,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
printk(KERN_ERR "JBD2: Error %d detected when updating "
"journal superblock for %s.\n", ret,
journal->j_devname);
+ jbd2_journal_abort(journal, ret);
}
+
+ return ret;
}
/**
@@ -1383,10 +1393,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
* Update a journal's superblock information about log tail and write it to
* disk, waiting for the IO to complete.
*/
-void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
unsigned long tail_block, int write_op)
{
journal_superblock_t *sb = journal->j_superblock;
+ int ret;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@@ -1395,13 +1406,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block);
- jbd2_write_superblock(journal, write_op);
+ ret = jbd2_write_superblock(journal, write_op);
+ if (ret)
+ goto out;
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
WARN_ON(!sb->s_sequence);
journal->j_flags &= ~JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
+
+out:
+ return ret;
}
/**
@@ -1950,7 +1966,14 @@ int jbd2_journal_flush(journal_t *journal)
return -EIO;
mutex_lock(&journal->j_checkpoint_mutex);
- jbd2_cleanup_journal_tail(journal);
+ if (!err) {
+ err = jbd2_cleanup_journal_tail(journal);
+ if (err < 0) {
+ mutex_unlock(&journal->j_checkpoint_mutex);
+ goto out;
+ }
+ err = 0;
+ }
/* Finally, mark the journal as really needing no recovery.
* This sets s_start==0 in the underlying superblock, which is
@@ -1966,7 +1989,8 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
write_unlock(&journal->j_state_lock);
- return 0;
+out:
+ return err;
}
/**
diff --git a/kernel/fs/namespace.c b/kernel/fs/namespace.c
index c246b29e4..28937028f 100644
--- a/kernel/fs/namespace.c
+++ b/kernel/fs/namespace.c
@@ -1354,6 +1354,36 @@ enum umount_tree_flags {
UMOUNT_PROPAGATE = 2,
UMOUNT_CONNECTED = 4,
};
+
+static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
+{
+ /* Leaving mounts connected is only valid for lazy umounts */
+ if (how & UMOUNT_SYNC)
+ return true;
+
+ /* A mount without a parent has nothing to be connected to */
+ if (!mnt_has_parent(mnt))
+ return true;
+
+ /* Because the reference counting rules change when mounts are
+ * unmounted and connected, umounted mounts may not be
+ * connected to mounted mounts.
+ */
+ if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
+ return true;
+
+ /* Has it been requested that the mount remain connected? */
+ if (how & UMOUNT_CONNECTED)
+ return false;
+
+ /* Is the mount locked such that it needs to remain connected? */
+ if (IS_MNT_LOCKED(mnt))
+ return false;
+
+ /* By default disconnect the mount */
+ return true;
+}
+
/*
* mount_lock must be held
* namespace_sem must be held for write
@@ -1391,10 +1421,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
if (how & UMOUNT_SYNC)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
- disconnect = !(((how & UMOUNT_CONNECTED) &&
- mnt_has_parent(p) &&
- (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) ||
- IS_MNT_LOCKED_AND_LAZY(p));
+ disconnect = disconnect_mount(p, how);
pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
disconnect ? &unmounted : NULL);
@@ -1531,11 +1558,8 @@ void __detach_mounts(struct dentry *dentry)
while (!hlist_empty(&mp->m_list)) {
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
- struct mount *p, *tmp;
- list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
- hlist_add_head(&p->mnt_umount.s_list, &unmounted);
- umount_mnt(p);
- }
+ hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
+ umount_mnt(mnt);
}
else umount_tree(mnt, UMOUNT_CONNECTED);
}
diff --git a/kernel/fs/nfs/flexfilelayout/flexfilelayout.c b/kernel/fs/nfs/flexfilelayout/flexfilelayout.c
index 7d05089e5..6f5f0f425 100644
--- a/kernel/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/kernel/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -631,7 +631,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
nfs_direct_set_resched_writes(hdr->dreq);
/* fake unstable write to let common nfs resend pages */
hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = 0;
+ hdr->good_bytes = hdr->args.count;
}
return;
}
diff --git a/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 77a2d026a..f13e1969e 100644
--- a/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
__func__, PTR_ERR(cred));
return PTR_ERR(cred);
} else {
- mirror->cred = cred;
+ if (cmpxchg(&mirror->cred, NULL, cred))
+ put_rpccred(cred);
}
}
return 0;
@@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
smp_rmb();
if (ds->ds_clp)
- goto out;
+ goto out_update_creds;
flavor = nfs4_ff_layout_choose_authflavor(mirror);
@@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
}
}
}
-
+out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds))
ds = NULL;
out:
diff --git a/kernel/fs/nfs/inode.c b/kernel/fs/nfs/inode.c
index f734562c6..5d25b9d97 100644
--- a/kernel/fs/nfs/inode.c
+++ b/kernel/fs/nfs/inode.c
@@ -1242,9 +1242,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
- if (cur_size != new_isize && nfsi->nrequests == 0)
+ if (cur_size != new_isize)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
}
+ if (nfsi->nrequests != 0)
+ invalid &= ~NFS_INO_REVAL_PAGECACHE;
/* Have any file permissions changed? */
if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
@@ -1682,8 +1684,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_REVAL_PAGECACHE;
+ | NFS_INO_INVALID_ACL;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
inode->i_version = fattr->change_attr;
@@ -1715,7 +1716,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
- invalid &= ~NFS_INO_REVAL_PAGECACHE;
}
dprintk("NFS: isize change on server for file %s/%ld "
"(%Ld to %Ld)\n",
diff --git a/kernel/fs/nfs/nfs3xdr.c b/kernel/fs/nfs/nfs3xdr.c
index 53852a4bd..9b04c2e6f 100644
--- a/kernel/fs/nfs/nfs3xdr.c
+++ b/kernel/fs/nfs/nfs3xdr.c
@@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
if (args->npages != 0)
xdr_write_pages(xdr, args->pages, 0, args->len);
else
- xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
+ xdr_reserve_space(xdr, args->len);
error = nfsacl_encode(xdr->buf, base, args->inode,
(args->mask & NFS_ACL) ?
diff --git a/kernel/fs/nfs/nfs4proc.c b/kernel/fs/nfs/nfs4proc.c
index 55e1e3af2..d3f205126 100644
--- a/kernel/fs/nfs/nfs4proc.c
+++ b/kernel/fs/nfs/nfs4proc.c
@@ -1204,12 +1204,15 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state,
static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
{
+ if (!(state->n_wronly || state->n_rdonly || state->n_rdwr))
+ return;
if (state->n_wronly)
set_bit(NFS_O_WRONLY_STATE, &state->flags);
if (state->n_rdonly)
set_bit(NFS_O_RDONLY_STATE, &state->flags);
if (state->n_rdwr)
set_bit(NFS_O_RDWR_STATE, &state->flags);
+ set_bit(NFS_OPEN_STATE, &state->flags);
}
static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
diff --git a/kernel/fs/nfs/nfs4state.c b/kernel/fs/nfs/nfs4state.c
index 2782cfca2..ddef1dc80 100644
--- a/kernel/fs/nfs/nfs4state.c
+++ b/kernel/fs/nfs/nfs4state.c
@@ -1482,6 +1482,8 @@ restart:
spin_unlock(&state->state_lock);
}
nfs4_put_open_state(state);
+ clear_bit(NFS4CLNT_RECLAIM_NOGRACE,
+ &state->flags);
spin_lock(&sp->so_lock);
goto restart;
}
diff --git a/kernel/fs/nfs/pagelist.c b/kernel/fs/nfs/pagelist.c
index 282b39369..7b4552678 100644
--- a/kernel/fs/nfs/pagelist.c
+++ b/kernel/fs/nfs/pagelist.c
@@ -1110,8 +1110,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
nfs_list_remove_request(req);
if (__nfs_pageio_add_request(desc, req))
continue;
- if (desc->pg_error < 0)
+ if (desc->pg_error < 0) {
+ list_splice_tail(&head, &mirror->pg_list);
+ mirror->pg_recoalesce = 1;
return 0;
+ }
break;
}
} while (mirror->pg_recoalesce);
diff --git a/kernel/fs/nfs/pnfs.c b/kernel/fs/nfs/pnfs.c
index 230606243..d47c18868 100644
--- a/kernel/fs/nfs/pnfs.c
+++ b/kernel/fs/nfs/pnfs.c
@@ -1821,6 +1821,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
/* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
hdr->completion_ops);
+ set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
@@ -1865,6 +1866,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
mirror->pg_recoalesce = 1;
}
nfs_pgio_data_destroy(hdr);
+ hdr->release(hdr);
}
static enum pnfs_try_status
@@ -1979,6 +1981,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
mirror->pg_recoalesce = 1;
}
nfs_pgio_data_destroy(hdr);
+ hdr->release(hdr);
}
/*
diff --git a/kernel/fs/nfs/write.c b/kernel/fs/nfs/write.c
index dfc19f157..daf355642 100644
--- a/kernel/fs/nfs/write.c
+++ b/kernel/fs/nfs/write.c
@@ -1289,6 +1289,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
+ set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
nfs_unlock_request(req);
nfs_end_page_writeback(req);
nfs_release_request(req);
diff --git a/kernel/fs/nfsd/nfs4state.c b/kernel/fs/nfsd/nfs4state.c
index 039f9c8a9..6e13504f7 100644
--- a/kernel/fs/nfsd/nfs4state.c
+++ b/kernel/fs/nfsd/nfs4state.c
@@ -4397,9 +4397,9 @@ laundromat_main(struct work_struct *laundry)
queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
}
-static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
+static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
- if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle))
+ if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
return nfserr_bad_stateid;
return nfs_ok;
}
@@ -4574,20 +4574,48 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfs_ok;
}
+static struct file *
+nfs4_find_file(struct nfs4_stid *s, int flags)
+{
+ switch (s->sc_type) {
+ case NFS4_DELEG_STID:
+ if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
+ return NULL;
+ return get_file(s->sc_file->fi_deleg_file);
+ case NFS4_OPEN_STID:
+ case NFS4_LOCK_STID:
+ if (flags & RD_STATE)
+ return find_readable_file(s->sc_file);
+ else
+ return find_writeable_file(s->sc_file);
+ break;
+ }
+
+ return NULL;
+}
+
+static __be32
+nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
+{
+ __be32 status;
+
+ status = nfsd4_check_openowner_confirmed(ols);
+ if (status)
+ return status;
+ return nfs4_check_openmode(ols, flags);
+}
+
/*
-* Checks for stateid operations
-*/
+ * Checks for stateid operations
+ */
__be32
nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
stateid_t *stateid, int flags, struct file **filpp)
{
- struct nfs4_stid *s;
- struct nfs4_ol_stateid *stp = NULL;
- struct nfs4_delegation *dp = NULL;
- struct svc_fh *current_fh = &cstate->current_fh;
- struct inode *ino = d_inode(current_fh->fh_dentry);
+ struct svc_fh *fhp = &cstate->current_fh;
+ struct inode *ino = d_inode(fhp->fh_dentry);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct file *file = NULL;
+ struct nfs4_stid *s;
__be32 status;
if (filpp)
@@ -4597,60 +4625,39 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
return nfserr_grace;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
- return check_special_stateids(net, current_fh, stateid, flags);
+ return check_special_stateids(net, fhp, stateid, flags);
status = nfsd4_lookup_stateid(cstate, stateid,
NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, nn);
if (status)
return status;
- status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
+ status = check_stateid_generation(stateid, &s->sc_stateid,
+ nfsd4_has_session(cstate));
if (status)
goto out;
+
switch (s->sc_type) {
case NFS4_DELEG_STID:
- dp = delegstateid(s);
- status = nfs4_check_delegmode(dp, flags);
- if (status)
- goto out;
- if (filpp) {
- file = dp->dl_stid.sc_file->fi_deleg_file;
- if (!file) {
- WARN_ON_ONCE(1);
- status = nfserr_serverfault;
- goto out;
- }
- get_file(file);
- }
+ status = nfs4_check_delegmode(delegstateid(s), flags);
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
- stp = openlockstateid(s);
- status = nfs4_check_fh(current_fh, stp);
- if (status)
- goto out;
- status = nfsd4_check_openowner_confirmed(stp);
- if (status)
- goto out;
- status = nfs4_check_openmode(stp, flags);
- if (status)
- goto out;
- if (filpp) {
- struct nfs4_file *fp = stp->st_stid.sc_file;
-
- if (flags & RD_STATE)
- file = find_readable_file(fp);
- else
- file = find_writeable_file(fp);
- }
+ status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
break;
default:
status = nfserr_bad_stateid;
+ break;
+ }
+ if (status)
goto out;
+ status = nfs4_check_fh(fhp, s);
+
+ if (!status && filpp) {
+ *filpp = nfs4_find_file(s, flags);
+ if (!*filpp)
+ status = nfserr_serverfault;
}
- status = nfs_ok;
- if (file)
- *filpp = file;
out:
nfs4_put_stid(s);
return status;
@@ -4754,7 +4761,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
if (status)
return status;
- return nfs4_check_fh(current_fh, stp);
+ return nfs4_check_fh(current_fh, &stp->st_stid);
}
/*
diff --git a/kernel/fs/nfsd/nfs4xdr.c b/kernel/fs/nfsd/nfs4xdr.c
index 158badf94..d4d84451e 100644
--- a/kernel/fs/nfsd/nfs4xdr.c
+++ b/kernel/fs/nfsd/nfs4xdr.c
@@ -2142,6 +2142,7 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
FATTR4_WORD0_RDATTR_ERROR)
#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
+#define WORD2_ABSENT_FS_ATTRS 0
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
static inline __be32
@@ -2170,7 +2171,7 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
{ return 0; }
#endif
-static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
+static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 *rdattr_err)
{
/* As per referral draft: */
if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
@@ -2183,6 +2184,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
}
*bmval0 &= WORD0_ABSENT_FS_ATTRS;
*bmval1 &= WORD1_ABSENT_FS_ATTRS;
+ *bmval2 &= WORD2_ABSENT_FS_ATTRS;
return 0;
}
@@ -2246,8 +2248,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion));
if (exp->ex_fslocs.migrated) {
- BUG_ON(bmval[2]);
- status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
+ status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err);
if (status)
goto out;
}
@@ -2290,8 +2291,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
}
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) ||
- bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
+ bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
err = security_inode_getsecctx(d_inode(dentry),
&context, &contextlen);
contextsupport = (err == 0);
diff --git a/kernel/fs/notify/mark.c b/kernel/fs/notify/mark.c
index 92e48c70f..39ddcaf09 100644
--- a/kernel/fs/notify/mark.c
+++ b/kernel/fs/notify/mark.c
@@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
unsigned int flags)
{
struct fsnotify_mark *lmark, *mark;
+ LIST_HEAD(to_free);
+ /*
+ * We have to be really careful here. Anytime we drop mark_mutex, e.g.
+ * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
+ * to_free list so we have to use mark_mutex even when accessing that
+ * list. And freeing mark requires us to drop mark_mutex. So we can
+ * reliably free only the first mark in the list. That's why we first
+ * move marks to free to to_free list in one go and then free marks in
+ * to_free list one by one.
+ */
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
- if (mark->flags & flags) {
- fsnotify_get_mark(mark);
- fsnotify_destroy_mark_locked(mark, group);
- fsnotify_put_mark(mark);
- }
+ if (mark->flags & flags)
+ list_move(&mark->g_list, &to_free);
}
mutex_unlock(&group->mark_mutex);
+
+ while (1) {
+ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
+ if (list_empty(&to_free)) {
+ mutex_unlock(&group->mark_mutex);
+ break;
+ }
+ mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
+ fsnotify_get_mark(mark);
+ fsnotify_destroy_mark_locked(mark, group);
+ mutex_unlock(&group->mark_mutex);
+ fsnotify_put_mark(mark);
+ }
}
/*
diff --git a/kernel/fs/ocfs2/aops.c b/kernel/fs/ocfs2/aops.c
index f906a250d..9ea701270 100644
--- a/kernel/fs/ocfs2/aops.c
+++ b/kernel/fs/ocfs2/aops.c
@@ -686,7 +686,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
u64 s = i_size_read(inode);
- sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
+ sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) +
(do_div(s, osb->s_clustersize) >> 9);
ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
@@ -911,7 +911,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
ret = blkdev_issue_zeroout(osb->sb->s_bdev,
- p_cpos << (osb->s_clustersize_bits - 9),
+ (u64)p_cpos << (osb->s_clustersize_bits - 9),
zero_len_head >> 9, GFP_NOFS, false);
if (ret < 0)
mlog_errno(ret);
diff --git a/kernel/fs/ocfs2/dlmglue.c b/kernel/fs/ocfs2/dlmglue.c
index 8b23aa2f5..23157e40d 100644
--- a/kernel/fs/ocfs2/dlmglue.c
+++ b/kernel/fs/ocfs2/dlmglue.c
@@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
osb->dc_work_sequence = osb->dc_wake_sequence;
processed = osb->blocked_lock_count;
- while (processed) {
- BUG_ON(list_empty(&osb->blocked_lock_list));
-
+ /*
+ * blocked lock processing in this loop might call iput which can
+ * remove items off osb->blocked_lock_list. Downconvert up to
+ * 'processed' number of locks, but stop short if we had some
+ * removed in ocfs2_mark_lockres_freeing when downconverting.
+ */
+ while (processed && !list_empty(&osb->blocked_lock_list)) {
lockres = list_entry(osb->blocked_lock_list.next,
struct ocfs2_lock_res, l_blocked_list);
list_del_init(&lockres->l_blocked_list);
diff --git a/kernel/fs/overlayfs/readdir.c b/kernel/fs/overlayfs/readdir.c
index 907870e81..70e9af551 100644
--- a/kernel/fs/overlayfs/readdir.c
+++ b/kernel/fs/overlayfs/readdir.c
@@ -23,6 +23,7 @@ struct ovl_cache_entry {
u64 ino;
struct list_head l_node;
struct rb_node node;
+ struct ovl_cache_entry *next_maybe_whiteout;
bool is_whiteout;
char name[];
};
@@ -39,7 +40,7 @@ struct ovl_readdir_data {
struct rb_root root;
struct list_head *list;
struct list_head middle;
- struct dentry *dir;
+ struct ovl_cache_entry *first_maybe_whiteout;
int count;
int err;
};
@@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
return NULL;
}
-static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir,
+static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
const char *name, int len,
u64 ino, unsigned int d_type)
{
@@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir,
p->is_whiteout = false;
if (d_type == DT_CHR) {
- struct dentry *dentry;
- const struct cred *old_cred;
- struct cred *override_cred;
-
- override_cred = prepare_creds();
- if (!override_cred) {
- kfree(p);
- return NULL;
- }
-
- /*
- * CAP_DAC_OVERRIDE for lookup
- */
- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
- old_cred = override_creds(override_cred);
-
- dentry = lookup_one_len(name, dir, len);
- if (!IS_ERR(dentry)) {
- p->is_whiteout = ovl_is_whiteout(dentry);
- dput(dentry);
- }
- revert_creds(old_cred);
- put_cred(override_cred);
+ p->next_maybe_whiteout = rdd->first_maybe_whiteout;
+ rdd->first_maybe_whiteout = p;
}
return p;
}
@@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
return 0;
}
- p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type);
+ p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
if (p == NULL)
return -ENOMEM;
@@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd,
if (p) {
list_move_tail(&p->l_node, &rdd->middle);
} else {
- p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type);
+ p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
if (p == NULL)
rdd->err = -ENOMEM;
else
@@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name,
return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
}
+static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
+{
+ int err;
+ struct ovl_cache_entry *p;
+ struct dentry *dentry;
+ const struct cred *old_cred;
+ struct cred *override_cred;
+
+ override_cred = prepare_creds();
+ if (!override_cred)
+ return -ENOMEM;
+
+ /*
+ * CAP_DAC_OVERRIDE for lookup
+ */
+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+ old_cred = override_creds(override_cred);
+
+ err = mutex_lock_killable(&dir->d_inode->i_mutex);
+ if (!err) {
+ while (rdd->first_maybe_whiteout) {
+ p = rdd->first_maybe_whiteout;
+ rdd->first_maybe_whiteout = p->next_maybe_whiteout;
+ dentry = lookup_one_len(p->name, dir, p->len);
+ if (!IS_ERR(dentry)) {
+ p->is_whiteout = ovl_is_whiteout(dentry);
+ dput(dentry);
+ }
+ }
+ mutex_unlock(&dir->d_inode->i_mutex);
+ }
+ revert_creds(old_cred);
+ put_cred(override_cred);
+
+ return err;
+}
+
static inline int ovl_dir_read(struct path *realpath,
struct ovl_readdir_data *rdd)
{
@@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath,
if (IS_ERR(realfile))
return PTR_ERR(realfile);
- rdd->dir = realpath->dentry;
+ rdd->first_maybe_whiteout = NULL;
rdd->ctx.pos = 0;
do {
rdd->count = 0;
@@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath,
if (err >= 0)
err = rdd->err;
} while (!err && rdd->count);
+
+ if (!err && rdd->first_maybe_whiteout)
+ err = ovl_check_whiteouts(realpath->dentry, rdd);
+
fput(realfile);
return err;
diff --git a/kernel/fs/pnode.h b/kernel/fs/pnode.h
index 7114ce6e6..0fcdbe7ca 100644
--- a/kernel/fs/pnode.h
+++ b/kernel/fs/pnode.h
@@ -20,8 +20,6 @@
#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
-#define IS_MNT_LOCKED_AND_LAZY(m) \
- (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED)
#define CL_EXPIRE 0x01
#define CL_SLAVE 0x02
diff --git a/kernel/fs/signalfd.c b/kernel/fs/signalfd.c
index 7e412ad74..270221fce 100644
--- a/kernel/fs/signalfd.c
+++ b/kernel/fs/signalfd.c
@@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
* Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here.
*/
- if (kinfo->si_code == BUS_MCEERR_AR ||
- kinfo->si_code == BUS_MCEERR_AO)
+ if (kinfo->si_signo == SIGBUS &&
+ (kinfo->si_code == BUS_MCEERR_AR ||
+ kinfo->si_code == BUS_MCEERR_AO))
err |= __put_user((short) kinfo->si_addr_lsb,
&uinfo->ssi_addr_lsb);
#endif
diff --git a/kernel/fs/xfs/libxfs/xfs_attr_remote.c b/kernel/fs/xfs/libxfs/xfs_attr_remote.c
index 20de88d1b..dd714037c 100644
--- a/kernel/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/kernel/fs/xfs/libxfs/xfs_attr_remote.c
@@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ int blksize = mp->m_attr_geo->blksize;
char *ptr;
int len;
xfs_daddr_t bno;
- int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify(
ASSERT(len >= blksize);
while (len > 0) {
+ struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
+
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
}
- if (bip) {
- struct xfs_attr3_rmt_hdr *rmt;
- rmt = (struct xfs_attr3_rmt_hdr *)ptr;
- rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ /*
+ * Ensure we aren't writing bogus LSNs to disk. See
+ * xfs_attr3_rmt_hdr_set() for the explanation.
+ */
+ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
@@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set(
rmt->rm_owner = cpu_to_be64(ino);
rmt->rm_blkno = cpu_to_be64(bno);
+ /*
+ * Remote attribute blocks are written synchronously, so we don't
+ * have an LSN that we can stamp in them that makes any sense to log
+ * recovery. To ensure that log recovery handles overwrites of these
+ * blocks sanely (i.e. once they've been freed and reallocated as some
+ * other type of metadata) we need to ensure that the LSN has a value
+ * that tells log recovery to ignore the LSN and overwrite the buffer
+ * with whatever is in it's log. To do this, we use the magic
+ * NULLCOMMITLSN to indicate that the LSN is invalid.
+ */
+ rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
+
return sizeof(struct xfs_attr3_rmt_hdr);
}
@@ -434,14 +451,21 @@ xfs_attr_rmtval_set(
/*
* Allocate a single extent, up to the size of the value.
+ *
+ * Note that we have to consider this a data allocation as we
+ * write the remote attribute without logging the contents.
+ * Hence we must ensure that we aren't using blocks that are on
+ * the busy list so that we don't overwrite blocks which have
+ * recently been freed but their transactions are not yet
+ * committed to disk. If we overwrite the contents of a busy
+ * extent and then crash then the block may not contain the
+ * correct metadata after log recovery occurs.
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
- blkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- args->firstblock, args->total, &map, &nmap,
- args->flist);
+ blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
+ args->total, &map, &nmap, args->flist);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
diff --git a/kernel/fs/xfs/xfs_attr_inactive.c b/kernel/fs/xfs/xfs_attr_inactive.c
index 3fbf167cf..73e75a87a 100644
--- a/kernel/fs/xfs/xfs_attr_inactive.c
+++ b/kernel/fs/xfs/xfs_attr_inactive.c
@@ -435,8 +435,14 @@ xfs_attr_inactive(
*/
xfs_trans_ijoin(trans, dp, 0);
- /* invalidate and truncate the attribute fork extents */
- if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+ /*
+ * Invalidate and truncate the attribute fork extents. Make sure the
+ * fork actually has attributes as otherwise the invalidation has no
+ * blocks to read and returns an error. In this case, just do the fork
+ * removal below.
+ */
+ if (xfs_inode_hasattr(dp) &&
+ dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
diff --git a/kernel/fs/xfs/xfs_inode.c b/kernel/fs/xfs/xfs_inode.c
index 539a85fdd..fec4bfba0 100644
--- a/kernel/fs/xfs/xfs_inode.c
+++ b/kernel/fs/xfs/xfs_inode.c
@@ -164,7 +164,7 @@ xfs_ilock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL)
mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
@@ -212,7 +212,7 @@ xfs_ilock_nowait(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) {
if (!mrtryupdate(&ip->i_iolock))
@@ -281,7 +281,7 @@ xfs_iunlock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
ASSERT(lock_flags != 0);
if (lock_flags & XFS_IOLOCK_EXCL)
@@ -364,30 +364,38 @@ int xfs_lock_delays;
/*
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
- * value. This shouldn't be called for page fault locking, but we also need to
- * ensure we don't overrun the number of lockdep subclasses for the iolock or
- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
+ * value. This can be called for any type of inode lock combination, including
+ * parent locking. Care must be taken to ensure we don't overrun the subclass
+ * storage fields in the class mask we build.
*/
static inline int
xfs_lock_inumorder(int lock_mode, int subclass)
{
+ int class = 0;
+
+ ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
+ XFS_ILOCK_RTSUM)));
+
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
- ASSERT(subclass + XFS_LOCK_INUMORDER <
- (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+ ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
+ ASSERT(subclass + XFS_IOLOCK_PARENT_VAL <
+ MAX_LOCKDEP_SUBCLASSES);
+ class += subclass << XFS_IOLOCK_SHIFT;
+ if (lock_mode & XFS_IOLOCK_PARENT)
+ class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
}
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
- ASSERT(subclass + XFS_LOCK_INUMORDER <
- (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
- XFS_MMAPLOCK_SHIFT;
+ ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
+ class += subclass << XFS_MMAPLOCK_SHIFT;
}
- if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+ if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
+ ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
+ class += subclass << XFS_ILOCK_SHIFT;
+ }
- return lock_mode;
+ return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
}
/*
@@ -399,6 +407,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
* transaction (such as truncate). This can result in deadlock since the long
* running trans might need to wait for the inode we just locked in order to
* push the tail and free space in the log.
+ *
+ * xfs_lock_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
*/
void
xfs_lock_inodes(
@@ -409,8 +422,29 @@ xfs_lock_inodes(
int attempts = 0, i, j, try_lock;
xfs_log_item_t *lp;
- /* currently supports between 2 and 5 inodes */
+ /*
+ * Currently supports between 2 and 5 inodes with exclusive locking. We
+ * support an arbitrary depth of locking here, but absolute limits on
+ * inodes depend on the the type of locking and the limits placed by
+ * lockdep annotations in xfs_lock_inumorder. These are all checked by
+ * the asserts.
+ */
ASSERT(ips && inodes >= 2 && inodes <= 5);
+ ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
+ XFS_ILOCK_EXCL));
+ ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
+ XFS_ILOCK_SHARED)));
+ ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
+ inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
+ ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
+ inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
+ ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
+ inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
+
+ if (lock_mode & XFS_IOLOCK_EXCL) {
+ ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
+ } else if (lock_mode & XFS_MMAPLOCK_EXCL)
+ ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
try_lock = 0;
i = 0;
diff --git a/kernel/fs/xfs/xfs_inode.h b/kernel/fs/xfs/xfs_inode.h
index 8f22d2036..ee26a603c 100644
--- a/kernel/fs/xfs/xfs_inode.h
+++ b/kernel/fs/xfs/xfs_inode.h
@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* Flags for lockdep annotations.
*
* XFS_LOCK_PARENT - for directory operations that require locking a
- * parent directory inode and a child entry inode. The parent gets locked
- * with this flag so it gets a lockdep subclass of 1 and the child entry
- * lock will have a lockdep subclass of 0.
+ * parent directory inode and a child entry inode. IOLOCK requires nesting,
+ * MMAPLOCK does not support this class, ILOCK requires a single subclass
+ * to differentiate parent from child.
*
* XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
* inodes do not participate in the normal lock order, and thus have their
@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* XFS_LOCK_INUMORDER - for locking several inodes at the some time
* with xfs_lock_inodes(). This flag is used as the starting subclass
* and each subsequent lock acquired will increment the subclass by one.
- * So the first lock acquired will have a lockdep subclass of 4, the
- * second lock will have a lockdep subclass of 5, and so on. It is
- * the responsibility of the class builder to shift this to the correct
- * portion of the lock_mode lockdep mask.
+ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
+ * limited to the subclasses we can represent via nesting. We need at least
+ * 5 inodes nest depth for the ILOCK through rename, and we also have to support
+ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
+ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
+ * 8 subclasses supported by lockdep.
+ *
+ * This also means we have to number the sub-classes in the lowest bits of
+ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
+ * mask and we can't use bit-masking to build the subclasses. What a mess.
+ *
+ * Bit layout:
+ *
+ * Bit Lock Region
+ * 16-19 XFS_IOLOCK_SHIFT dependencies
+ * 20-23 XFS_MMAPLOCK_SHIFT dependencies
+ * 24-31 XFS_ILOCK_SHIFT dependencies
+ *
+ * IOLOCK values
+ *
+ * 0-3 subclass value
+ * 4-7 PARENT subclass values
+ *
+ * MMAPLOCK values
+ *
+ * 0-3 subclass value
+ * 4-7 unused
+ *
+ * ILOCK values
+ * 0-4 subclass values
+ * 5 PARENT subclass (not nestable)
+ * 6 RTBITMAP subclass (not nestable)
+ * 7 RTSUM subclass (not nestable)
+ *
*/
-#define XFS_LOCK_PARENT 1
-#define XFS_LOCK_RTBITMAP 2
-#define XFS_LOCK_RTSUM 3
-#define XFS_LOCK_INUMORDER 4
-
-#define XFS_IOLOCK_SHIFT 16
-#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
-
-#define XFS_MMAPLOCK_SHIFT 20
-
-#define XFS_ILOCK_SHIFT 24
-#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
-
-#define XFS_IOLOCK_DEP_MASK 0x000f0000
-#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
-#define XFS_ILOCK_DEP_MASK 0xff000000
-#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
+#define XFS_IOLOCK_SHIFT 16
+#define XFS_IOLOCK_PARENT_VAL 4
+#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
+#define XFS_IOLOCK_DEP_MASK 0x000f0000
+#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
+
+#define XFS_MMAPLOCK_SHIFT 20
+#define XFS_MMAPLOCK_NUMORDER 0
+#define XFS_MMAPLOCK_MAX_SUBCLASS 3
+#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
+
+#define XFS_ILOCK_SHIFT 24
+#define XFS_ILOCK_PARENT_VAL 5
+#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
+#define XFS_ILOCK_RTBITMAP_VAL 6
+#define XFS_ILOCK_RTSUM_VAL 7
+#define XFS_ILOCK_DEP_MASK 0xff000000
+#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
+
+#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \
XFS_ILOCK_DEP_MASK)
diff --git a/kernel/fs/xfs/xfs_log_recover.c b/kernel/fs/xfs/xfs_log_recover.c
index 4f5784f85..a5d03396d 100644
--- a/kernel/fs/xfs/xfs_log_recover.c
+++ b/kernel/fs/xfs/xfs_log_recover.c
@@ -1887,9 +1887,14 @@ xlog_recover_get_buf_lsn(
uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
break;
case XFS_ATTR3_RMT_MAGIC:
- lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
- uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
- break;
+ /*
+ * Remote attr blocks are written synchronously, rather than
+ * being logged. That means they do not contain a valid LSN
+ * (i.e. transactionally ordered) in them, and hence any time we
+ * see a buffer to replay over the top of a remote attribute
+ * block we should simply do so.
+ */
+ goto recover_immediately;
case XFS_SB_MAGIC:
lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
uuid = &((struct xfs_dsb *)blk)->sb_uuid;
diff --git a/kernel/fs/xfs/xfs_symlink.c b/kernel/fs/xfs/xfs_symlink.c
index 3df411ead..40c076523 100644
--- a/kernel/fs/xfs/xfs_symlink.c
+++ b/kernel/fs/xfs/xfs_symlink.c
@@ -104,7 +104,7 @@ xfs_readlink_bmap(
cur_chunk += sizeof(struct xfs_dsymlink_hdr);
}
- memcpy(link + offset, bp->b_addr, byte_cnt);
+ memcpy(link + offset, cur_chunk, byte_cnt);
pathlen -= byte_cnt;
offset += byte_cnt;