diff options
Diffstat (limited to 'kernel/fs')
41 files changed, 522 insertions, 238 deletions
diff --git a/kernel/fs/9p/vfs_inode.c b/kernel/fs/9p/vfs_inode.c index 703342e30..53f1e8a21 100644 --- a/kernel/fs/9p/vfs_inode.c +++ b/kernel/fs/9p/vfs_inode.c @@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/kernel/fs/9p/vfs_inode_dotl.c b/kernel/fs/9p/vfs_inode_dotl.c index 9861c7c95..4d3ecfb55 100644 --- a/kernel/fs/9p/vfs_inode_dotl.c +++ b/kernel/fs/9p/vfs_inode_dotl.c @@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/kernel/fs/btrfs/inode-map.c b/kernel/fs/btrfs/inode-map.c index f6a596d5a..d4a582ac3 100644 --- a/kernel/fs/btrfs/inode-map.c +++ b/kernel/fs/btrfs/inode-map.c @@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->ino_cache_progress) count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); - kfree(info); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); + kmem_cache_free(btrfs_free_space_cachep, info); } } diff --git a/kernel/fs/btrfs/ioctl.c b/kernel/fs/btrfs/ioctl.c index 1c22c6518..37d456a9a 100644 --- a/kernel/fs/btrfs/ioctl.c +++ b/kernel/fs/btrfs/ioctl.c @@ -2413,8 +2413,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_unlock_inode; } - d_invalidate(dentry); - down_write(&root->fs_info->subvol_sem); err = may_destroy_subvol(dest); @@ -2508,7 +2506,7 @@ out_up_write: out_unlock_inode: mutex_unlock(&inode->i_mutex); if (!err) { - shrink_dcache_sb(root->fs_info->sb); + d_invalidate(dentry); btrfs_invalidate_inodes(dest); d_delete(dentry); ASSERT(dest->send_in_progress == 0); @@ -2940,7 +2938,7 @@ out_unlock: static long btrfs_ioctl_file_extent_same(struct file *file, struct btrfs_ioctl_same_args __user *argp) { - struct btrfs_ioctl_same_args *same; + struct btrfs_ioctl_same_args *same = NULL; struct btrfs_ioctl_same_extent_info *info; struct inode *src = file_inode(file); u64 off; @@ -2970,6 +2968,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, if (IS_ERR(same)) { ret = PTR_ERR(same); + same = NULL; goto out; } @@ -3040,6 +3039,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, out: mnt_drop_write_file(file); + kfree(same); return ret; } @@ -3434,6 +3434,20 @@ process_slot: u64 trim = 0; u64 aligned_end = 0; + /* + * Don't copy an inline extent into an offset + * greater than zero. Having an inline extent + * at such an offset results in chaos as btrfs + * isn't prepared for such cases. Just skip + * this case for the same reasons as commented + * at btrfs_ioctl_clone(). + */ + if (last_dest_end > 0) { + ret = -EOPNOTSUPP; + btrfs_end_transaction(trans, root); + goto out; + } + if (off > key.offset) { skip = off - key.offset; new_key.offset += skip; diff --git a/kernel/fs/btrfs/transaction.c b/kernel/fs/btrfs/transaction.c index 5628e2525..94e909c5a 100644 --- a/kernel/fs/btrfs/transaction.c +++ b/kernel/fs/btrfs/transaction.c @@ -758,7 +758,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->ordered)) { spin_lock(&info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); spin_unlock(&info->trans_lock); } @@ -1848,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } spin_lock(&root->fs_info->trans_lock); - list_splice(&trans->ordered, &cur_trans->pending_ordered); + list_splice_init(&trans->ordered, &cur_trans->pending_ordered); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&root->fs_info->trans_lock); atomic_inc(&cur_trans->use_count); diff --git a/kernel/fs/btrfs/tree-log.c b/kernel/fs/btrfs/tree-log.c index d04968374..4920fceff 100644 --- a/kernel/fs/btrfs/tree-log.c +++ b/kernel/fs/btrfs/tree-log.c @@ -4161,6 +4161,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; + bool need_log_inode_item = true; path = btrfs_alloc_path(); if (!path) @@ -4269,11 +4270,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } else { if (inode_only == LOG_INODE_ALL) fast_search = true; - ret = log_inode_item(trans, log, dst_path, inode); - if (ret) { - err = ret; - goto out_unlock; - } goto log_extents; } @@ -4296,6 +4292,9 @@ again: if (min_key.type > max_key.type) break; + if (min_key.type == BTRFS_INODE_ITEM_KEY) + need_log_inode_item = false; + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4366,6 +4365,11 @@ next_slot: log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); + if (err) + goto out_unlock; + } if (fast_search) { /* * Some ordered extents started by fsync might have completed diff --git a/kernel/fs/dcache.c b/kernel/fs/dcache.c index e1d20e11e..c1dad9243 100644 --- a/kernel/fs/dcache.c +++ b/kernel/fs/dcache.c @@ -643,7 +643,7 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put__or_lock". + * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); @@ -698,7 +698,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = ACCESS_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) @@ -777,6 +777,9 @@ repeat: if (unlikely(d_unhashed(dentry))) goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; diff --git a/kernel/fs/ext4/extents.c b/kernel/fs/ext4/extents.c index e003a1e81..87ba10d1d 100644 --- a/kernel/fs/ext4/extents.c +++ b/kernel/fs/ext4/extents.c @@ -503,7 +503,7 @@ __read_extent_tree_block(const char *function, unsigned int line, struct buffer_head *bh; int err; - bh = sb_getblk(inode->i_sb, pblk); + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -1088,7 +1088,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; @@ -1282,7 +1282,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, if (newblock == 0) return err; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); diff --git a/kernel/fs/ext4/indirect.c b/kernel/fs/ext4/indirect.c index 958824019..94ae6874c 100644 --- a/kernel/fs/ext4/indirect.c +++ b/kernel/fs/ext4/indirect.c @@ -565,7 +565,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { EXT4_ERROR_INODE(inode, "Can't allocate blocks for " "non-extent mapped inodes with bigalloc"); - return -ENOSPC; + return -EUCLEAN; } /* Set up for the direct block allocation */ diff --git a/kernel/fs/ext4/inode.c b/kernel/fs/ext4/inode.c index 0554b0b59..966c61482 100644 --- a/kernel/fs/ext4/inode.c +++ b/kernel/fs/ext4/inode.c @@ -1342,7 +1342,7 @@ static void ext4_da_page_release_reservation(struct page *page, unsigned int offset, unsigned int length) { - int to_release = 0; + int to_release = 0, contiguous_blks = 0; struct buffer_head *head, *bh; unsigned int curr_off = 0; struct inode *inode = page->mapping->host; @@ -1363,14 +1363,23 @@ static void ext4_da_page_release_reservation(struct page *page, if ((offset <= curr_off) && (buffer_delay(bh))) { to_release++; + contiguous_blks++; clear_buffer_delay(bh); + } else if (contiguous_blks) { + lblk = page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk += (curr_off >> inode->i_blkbits) - + contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); + contiguous_blks = 0; } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - if (to_release) { + if (contiguous_blks) { lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ext4_es_remove_extent(inode, lblk, to_release); + lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); } /* If we have released all the blocks belonging to a cluster, then we @@ -1701,19 +1710,32 @@ static int __ext4_journalled_writepage(struct page *page, ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); } - /* As soon as we unlock the page, it can go away, but we have - * references to buffers so we are safe */ + /* + * We need to release the page lock before we start the + * journal, so grab a reference so the page won't disappear + * out from under us. + */ + get_page(page); unlock_page(page); handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, ext4_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out; + put_page(page); + goto out_no_pagelock; } - BUG_ON(!ext4_handle_valid(handle)); + lock_page(page); + put_page(page); + if (page->mapping != mapping) { + /* The page got truncated from under us */ + ext4_journal_stop(handle); + ret = 0; + goto out; + } + if (inline_data) { BUFFER_TRACE(inode_bh, "get write access"); ret = ext4_journal_get_write_access(handle, inode_bh); @@ -1739,6 +1761,8 @@ static int __ext4_journalled_writepage(struct page *page, NULL, bput_one); ext4_set_inode_state(inode, EXT4_STATE_JDATA); out: + unlock_page(page); +out_no_pagelock: brelse(inode_bh); return ret; } @@ -4345,7 +4369,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = (orig_ino & ~(inodes_per_block - 1)) + 1; + /* + * Calculate the first inode in the inode table block. Inode + * numbers are one-based. That is, the first inode in a block + * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). + */ + ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; diff --git a/kernel/fs/ext4/mballoc.c b/kernel/fs/ext4/mballoc.c index 8d1e60214..41260489d 100644 --- a/kernel/fs/ext4/mballoc.c +++ b/kernel/fs/ext4/mballoc.c @@ -4800,18 +4800,12 @@ do_more: /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - retry: - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - /* - * We use a retry loop because - * ext4_free_blocks() is not allowed to fail. - */ - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; diff --git a/kernel/fs/ext4/migrate.c b/kernel/fs/ext4/migrate.c index b52374e42..6163ad21c 100644 --- a/kernel/fs/ext4/migrate.c +++ b/kernel/fs/ext4/migrate.c @@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; + ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode) EXT4_FEATURE_RO_COMPAT_BIGALLOC)) return -EOPNOTSUPP; + /* + * In order to get correct extent info, force all delayed allocation + * blocks to be allocated, otherwise delayed allocation blocks may not + * be reflected and bypass the checks on extent header. + */ + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -650,11 +659,13 @@ int ext4_ind_migrate(struct inode *inode) goto errout; } if (eh->eh_entries == 0) - blk = len = 0; + blk = len = start = end = 0; else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - if (len > EXT4_NDIR_BLOCKS) { + start = le32_to_cpu(ex->ee_block); + end = start + len - 1; + if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; } @@ -662,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memset(ei->i_data, 0, sizeof(ei->i_data)); - for (i=0; i < len; i++) + for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); ext4_mark_inode_dirty(handle, inode); errout: diff --git a/kernel/fs/ext4/super.c b/kernel/fs/ext4/super.c index ca9d4a2fe..ca12affdb 100644 --- a/kernel/fs/ext4/super.c +++ b/kernel/fs/ext4/super.c @@ -807,6 +807,7 @@ static void ext4_put_super(struct super_block *sb) dump_orphan_list(sb, sbi); J_ASSERT(list_empty(&sbi->s_orphan)); + sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { /* @@ -4943,6 +4944,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } + if (*flags & MS_LAZYTIME) + sb->s_flags |= MS_LAZYTIME; + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; diff --git a/kernel/fs/fuse/inode.c b/kernel/fs/fuse/inode.c index 18dacf9ed..708d69711 100644 --- a/kernel/fs/fuse/inode.c +++ b/kernel/fs/fuse/inode.c @@ -1026,6 +1026,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_fput; fuse_conn_init(fc); + fc->release = fuse_free_conn; fc->dev = sb->s_dev; fc->sb = sb; @@ -1040,7 +1041,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->dont_mask = 1; sb->s_flags |= MS_POSIXACL; - fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; diff --git a/kernel/fs/hpfs/super.c b/kernel/fs/hpfs/super.c index 7cd00d3a7..8685c6557 100644 --- a/kernel/fs/hpfs/super.c +++ b/kernel/fs/hpfs/super.c @@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ -static char err_buf[1024]; - void hpfs_error(struct super_block *s, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("filesystem error: %pV", &vaf); + va_end(args); - pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); @@ -424,11 +427,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); - + + if (!new_opts) + return -ENOMEM; + sync_filesystem(s); *flags |= MS_NOATIME; - + hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; diff --git a/kernel/fs/jbd2/checkpoint.c b/kernel/fs/jbd2/checkpoint.c index 6f691c289..9c00e2e5a 100644 --- a/kernel/fs/jbd2/checkpoint.c +++ b/kernel/fs/jbd2/checkpoint.c @@ -392,7 +392,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) unsigned long blocknr; if (is_journal_aborted(journal)) - return 1; + return -EIO; if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) return 1; @@ -407,10 +407,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * jbd2_cleanup_journal_tail() doesn't get called all that often. */ if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); - __jbd2_update_log_tail(journal, first_tid, blocknr); - return 0; + return __jbd2_update_log_tail(journal, first_tid, blocknr); } diff --git a/kernel/fs/jbd2/journal.c b/kernel/fs/jbd2/journal.c index b96bd8076..112fad9e1 100644 --- a/kernel/fs/jbd2/journal.c +++ b/kernel/fs/jbd2/journal.c @@ -885,9 +885,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, * * Requires j_checkpoint_mutex */ -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) { unsigned long freed; + int ret; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); @@ -897,7 +898,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + if (ret) + goto out; + write_lock(&journal->j_state_lock); freed = block - journal->j_tail; if (block < journal->j_tail) @@ -913,6 +917,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) journal->j_tail_sequence = tid; journal->j_tail = block; write_unlock(&journal->j_state_lock); + +out: + return ret; } /* @@ -1331,7 +1338,7 @@ static int journal_reset(journal_t *journal) return jbd2_journal_start_thread(journal); } -static void jbd2_write_superblock(journal_t *journal, int write_op) +static int jbd2_write_superblock(journal_t *journal, int write_op) { struct buffer_head *bh = journal->j_sb_buffer; journal_superblock_t *sb = journal->j_superblock; @@ -1370,7 +1377,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) printk(KERN_ERR "JBD2: Error %d detected when updating " "journal superblock for %s.\n", ret, journal->j_devname); + jbd2_journal_abort(journal, ret); } + + return ret; } /** @@ -1383,10 +1393,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) * Update a journal's superblock information about log tail and write it to * disk, waiting for the IO to complete. */ -void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, +int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, unsigned long tail_block, int write_op) { journal_superblock_t *sb = journal->j_superblock; + int ret; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", @@ -1395,13 +1406,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, sb->s_sequence = cpu_to_be32(tail_tid); sb->s_start = cpu_to_be32(tail_block); - jbd2_write_superblock(journal, write_op); + ret = jbd2_write_superblock(journal, write_op); + if (ret) + goto out; /* Log is no longer empty */ write_lock(&journal->j_state_lock); WARN_ON(!sb->s_sequence); journal->j_flags &= ~JBD2_FLUSHED; write_unlock(&journal->j_state_lock); + +out: + return ret; } /** @@ -1950,7 +1966,14 @@ int jbd2_journal_flush(journal_t *journal) return -EIO; mutex_lock(&journal->j_checkpoint_mutex); - jbd2_cleanup_journal_tail(journal); + if (!err) { + err = jbd2_cleanup_journal_tail(journal); + if (err < 0) { + mutex_unlock(&journal->j_checkpoint_mutex); + goto out; + } + err = 0; + } /* Finally, mark the journal as really needing no recovery. * This sets s_start==0 in the underlying superblock, which is @@ -1966,7 +1989,8 @@ int jbd2_journal_flush(journal_t *journal) J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); write_unlock(&journal->j_state_lock); - return 0; +out: + return err; } /** diff --git a/kernel/fs/namespace.c b/kernel/fs/namespace.c index c246b29e4..28937028f 100644 --- a/kernel/fs/namespace.c +++ b/kernel/fs/namespace.c @@ -1354,6 +1354,36 @@ enum umount_tree_flags { UMOUNT_PROPAGATE = 2, UMOUNT_CONNECTED = 4, }; + +static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how) +{ + /* Leaving mounts connected is only valid for lazy umounts */ + if (how & UMOUNT_SYNC) + return true; + + /* A mount without a parent has nothing to be connected to */ + if (!mnt_has_parent(mnt)) + return true; + + /* Because the reference counting rules change when mounts are + * unmounted and connected, umounted mounts may not be + * connected to mounted mounts. + */ + if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) + return true; + + /* Has it been requested that the mount remain connected? */ + if (how & UMOUNT_CONNECTED) + return false; + + /* Is the mount locked such that it needs to remain connected? */ + if (IS_MNT_LOCKED(mnt)) + return false; + + /* By default disconnect the mount */ + return true; +} + /* * mount_lock must be held * namespace_sem must be held for write @@ -1391,10 +1421,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) if (how & UMOUNT_SYNC) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - disconnect = !(((how & UMOUNT_CONNECTED) && - mnt_has_parent(p) && - (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) || - IS_MNT_LOCKED_AND_LAZY(p)); + disconnect = disconnect_mount(p, how); pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, disconnect ? &unmounted : NULL); @@ -1531,11 +1558,8 @@ void __detach_mounts(struct dentry *dentry) while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { - struct mount *p, *tmp; - list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { - hlist_add_head(&p->mnt_umount.s_list, &unmounted); - umount_mnt(p); - } + hlist_add_head(&mnt->mnt_umount.s_list, &unmounted); + umount_mnt(mnt); } else umount_tree(mnt, UMOUNT_CONNECTED); } diff --git a/kernel/fs/nfs/flexfilelayout/flexfilelayout.c b/kernel/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e5..6f5f0f425 100644 --- a/kernel/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/kernel/fs/nfs/flexfilelayout/flexfilelayout.c @@ -631,7 +631,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) nfs_direct_set_resched_writes(hdr->dreq); /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = 0; + hdr->good_bytes = hdr->args.count; } return; } diff --git a/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026a..f13e1969e 100644 --- a/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/kernel/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, __func__, PTR_ERR(cred)); return PTR_ERR(cred); } else { - mirror->cred = cred; + if (cmpxchg(&mirror->cred, NULL, cred)) + put_rpccred(cred); } } return 0; @@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); if (ds->ds_clp) - goto out; + goto out_update_creds; flavor = nfs4_ff_layout_choose_authflavor(mirror); @@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, } } } - +out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) ds = NULL; out: diff --git a/kernel/fs/nfs/inode.c b/kernel/fs/nfs/inode.c index f734562c6..5d25b9d97 100644 --- a/kernel/fs/nfs/inode.c +++ b/kernel/fs/nfs/inode.c @@ -1242,9 +1242,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->nrequests == 0) + if (cur_size != new_isize) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } + if (nfsi->nrequests != 0) + invalid &= ~NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) @@ -1682,8 +1684,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_PAGECACHE; + | NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; @@ -1715,7 +1716,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - invalid &= ~NFS_INO_REVAL_PAGECACHE; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", diff --git a/kernel/fs/nfs/nfs3xdr.c b/kernel/fs/nfs/nfs3xdr.c index 53852a4bd..9b04c2e6f 100644 --- a/kernel/fs/nfs/nfs3xdr.c +++ b/kernel/fs/nfs/nfs3xdr.c @@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); else - xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + xdr_reserve_space(xdr, args->len); error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? diff --git a/kernel/fs/nfs/nfs4proc.c b/kernel/fs/nfs/nfs4proc.c index 55e1e3af2..d3f205126 100644 --- a/kernel/fs/nfs/nfs4proc.c +++ b/kernel/fs/nfs/nfs4proc.c @@ -1204,12 +1204,15 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state, static void nfs_resync_open_stateid_locked(struct nfs4_state *state) { + if (!(state->n_wronly || state->n_rdonly || state->n_rdwr)) + return; if (state->n_wronly) set_bit(NFS_O_WRONLY_STATE, &state->flags); if (state->n_rdonly) set_bit(NFS_O_RDONLY_STATE, &state->flags); if (state->n_rdwr) set_bit(NFS_O_RDWR_STATE, &state->flags); + set_bit(NFS_OPEN_STATE, &state->flags); } static void nfs_clear_open_stateid_locked(struct nfs4_state *state, diff --git a/kernel/fs/nfs/nfs4state.c b/kernel/fs/nfs/nfs4state.c index 2782cfca2..ddef1dc80 100644 --- a/kernel/fs/nfs/nfs4state.c +++ b/kernel/fs/nfs/nfs4state.c @@ -1482,6 +1482,8 @@ restart: spin_unlock(&state->state_lock); } nfs4_put_open_state(state); + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + &state->flags); spin_lock(&sp->so_lock); goto restart; } diff --git a/kernel/fs/nfs/pagelist.c b/kernel/fs/nfs/pagelist.c index 282b39369..7b4552678 100644 --- a/kernel/fs/nfs/pagelist.c +++ b/kernel/fs/nfs/pagelist.c @@ -1110,8 +1110,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) nfs_list_remove_request(req); if (__nfs_pageio_add_request(desc, req)) continue; - if (desc->pg_error < 0) + if (desc->pg_error < 0) { + list_splice_tail(&head, &mirror->pg_list); + mirror->pg_recoalesce = 1; return 0; + } break; } } while (mirror->pg_recoalesce); diff --git a/kernel/fs/nfs/pnfs.c b/kernel/fs/nfs/pnfs.c index 230606243..d47c18868 100644 --- a/kernel/fs/nfs/pnfs.c +++ b/kernel/fs/nfs/pnfs.c @@ -1821,6 +1821,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); + set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); @@ -1865,6 +1866,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } static enum pnfs_try_status @@ -1979,6 +1981,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } /* diff --git a/kernel/fs/nfs/write.c b/kernel/fs/nfs/write.c index dfc19f157..daf355642 100644 --- a/kernel/fs/nfs/write.c +++ b/kernel/fs/nfs/write.c @@ -1289,6 +1289,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); diff --git a/kernel/fs/nfsd/nfs4state.c b/kernel/fs/nfsd/nfs4state.c index 039f9c8a9..6e13504f7 100644 --- a/kernel/fs/nfsd/nfs4state.c +++ b/kernel/fs/nfsd/nfs4state.c @@ -4397,9 +4397,9 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } -static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { - if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) + if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } @@ -4574,20 +4574,48 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, return nfs_ok; } +static struct file * +nfs4_find_file(struct nfs4_stid *s, int flags) +{ + switch (s->sc_type) { + case NFS4_DELEG_STID: + if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) + return NULL; + return get_file(s->sc_file->fi_deleg_file); + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + if (flags & RD_STATE) + return find_readable_file(s->sc_file); + else + return find_writeable_file(s->sc_file); + break; + } + + return NULL; +} + +static __be32 +nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) +{ + __be32 status; + + status = nfsd4_check_openowner_confirmed(ols); + if (status) + return status; + return nfs4_check_openmode(ols, flags); +} + /* -* Checks for stateid operations -*/ + * Checks for stateid operations + */ __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filpp) { - struct nfs4_stid *s; - struct nfs4_ol_stateid *stp = NULL; - struct nfs4_delegation *dp = NULL; - struct svc_fh *current_fh = &cstate->current_fh; - struct inode *ino = d_inode(current_fh->fh_dentry); + struct svc_fh *fhp = &cstate->current_fh; + struct inode *ino = d_inode(fhp->fh_dentry); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct file *file = NULL; + struct nfs4_stid *s; __be32 status; if (filpp) @@ -4597,60 +4625,39 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(net, current_fh, stateid, flags); + return check_special_stateids(net, fhp, stateid, flags); status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); if (status) return status; - status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); + status = check_stateid_generation(stateid, &s->sc_stateid, + nfsd4_has_session(cstate)); if (status) goto out; + switch (s->sc_type) { case NFS4_DELEG_STID: - dp = delegstateid(s); - status = nfs4_check_delegmode(dp, flags); - if (status) - goto out; - if (filpp) { - file = dp->dl_stid.sc_file->fi_deleg_file; - if (!file) { - WARN_ON_ONCE(1); - status = nfserr_serverfault; - goto out; - } - get_file(file); - } + status = nfs4_check_delegmode(delegstateid(s), flags); break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: - stp = openlockstateid(s); - status = nfs4_check_fh(current_fh, stp); - if (status) - goto out; - status = nfsd4_check_openowner_confirmed(stp); - if (status) - goto out; - status = nfs4_check_openmode(stp, flags); - if (status) - goto out; - if (filpp) { - struct nfs4_file *fp = stp->st_stid.sc_file; - - if (flags & RD_STATE) - file = find_readable_file(fp); - else - file = find_writeable_file(fp); - } + status = nfs4_check_olstateid(fhp, openlockstateid(s), flags); break; default: status = nfserr_bad_stateid; + break; + } + if (status) goto out; + status = nfs4_check_fh(fhp, s); + + if (!status && filpp) { + *filpp = nfs4_find_file(s, flags); + if (!*filpp) + status = nfserr_serverfault; } - status = nfs_ok; - if (file) - *filpp = file; out: nfs4_put_stid(s); return status; @@ -4754,7 +4761,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status) return status; - return nfs4_check_fh(current_fh, stp); + return nfs4_check_fh(current_fh, &stp->st_stid); } /* diff --git a/kernel/fs/nfsd/nfs4xdr.c b/kernel/fs/nfsd/nfs4xdr.c index 158badf94..d4d84451e 100644 --- a/kernel/fs/nfsd/nfs4xdr.c +++ b/kernel/fs/nfsd/nfs4xdr.c @@ -2142,6 +2142,7 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#define WORD2_ABSENT_FS_ATTRS 0 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 @@ -2170,7 +2171,7 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, { return 0; } #endif -static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) +static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 *rdattr_err) { /* As per referral draft: */ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS || @@ -2183,6 +2184,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) } *bmval0 &= WORD0_ABSENT_FS_ATTRS; *bmval1 &= WORD1_ABSENT_FS_ATTRS; + *bmval2 &= WORD2_ABSENT_FS_ATTRS; return 0; } @@ -2246,8 +2248,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); if (exp->ex_fslocs.migrated) { - BUG_ON(bmval[2]); - status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); + status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err); if (status) goto out; } @@ -2290,8 +2291,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || - bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) || + bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { err = security_inode_getsecctx(d_inode(dentry), &context, &contextlen); contextsupport = (err == 0); diff --git a/kernel/fs/notify/mark.c b/kernel/fs/notify/mark.c index 92e48c70f..39ddcaf09 100644 --- a/kernel/fs/notify/mark.c +++ b/kernel/fs/notify/mark.c @@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags) { struct fsnotify_mark *lmark, *mark; + LIST_HEAD(to_free); + /* + * We have to be really careful here. Anytime we drop mark_mutex, e.g. + * fsnotify_clear_marks_by_inode() can come and free marks. Even in our + * to_free list so we have to use mark_mutex even when accessing that + * list. And freeing mark requires us to drop mark_mutex. So we can + * reliably free only the first mark in the list. That's why we first + * move marks to free to to_free list in one go and then free marks in + * to_free list one by one. + */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->flags & flags) { - fsnotify_get_mark(mark); - fsnotify_destroy_mark_locked(mark, group); - fsnotify_put_mark(mark); - } + if (mark->flags & flags) + list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); + + while (1) { + mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); + if (list_empty(&to_free)) { + mutex_unlock(&group->mark_mutex); + break; + } + mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); + fsnotify_get_mark(mark); + fsnotify_destroy_mark_locked(mark, group); + mutex_unlock(&group->mark_mutex); + fsnotify_put_mark(mark); + } } /* diff --git a/kernel/fs/ocfs2/aops.c b/kernel/fs/ocfs2/aops.c index f906a250d..9ea701270 100644 --- a/kernel/fs/ocfs2/aops.c +++ b/kernel/fs/ocfs2/aops.c @@ -686,7 +686,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { u64 s = i_size_read(inode); - sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + + sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) + (do_div(s, osb->s_clustersize) >> 9); ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, @@ -911,7 +911,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); ret = blkdev_issue_zeroout(osb->sb->s_bdev, - p_cpos << (osb->s_clustersize_bits - 9), + (u64)p_cpos << (osb->s_clustersize_bits - 9), zero_len_head >> 9, GFP_NOFS, false); if (ret < 0) mlog_errno(ret); diff --git a/kernel/fs/ocfs2/dlmglue.c b/kernel/fs/ocfs2/dlmglue.c index 8b23aa2f5..23157e40d 100644 --- a/kernel/fs/ocfs2/dlmglue.c +++ b/kernel/fs/ocfs2/dlmglue.c @@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) osb->dc_work_sequence = osb->dc_wake_sequence; processed = osb->blocked_lock_count; - while (processed) { - BUG_ON(list_empty(&osb->blocked_lock_list)); - + /* + * blocked lock processing in this loop might call iput which can + * remove items off osb->blocked_lock_list. Downconvert up to + * 'processed' number of locks, but stop short if we had some + * removed in ocfs2_mark_lockres_freeing when downconverting. + */ + while (processed && !list_empty(&osb->blocked_lock_list)) { lockres = list_entry(osb->blocked_lock_list.next, struct ocfs2_lock_res, l_blocked_list); list_del_init(&lockres->l_blocked_list); diff --git a/kernel/fs/overlayfs/readdir.c b/kernel/fs/overlayfs/readdir.c index 907870e81..70e9af551 100644 --- a/kernel/fs/overlayfs/readdir.c +++ b/kernel/fs/overlayfs/readdir.c @@ -23,6 +23,7 @@ struct ovl_cache_entry { u64 ino; struct list_head l_node; struct rb_node node; + struct ovl_cache_entry *next_maybe_whiteout; bool is_whiteout; char name[]; }; @@ -39,7 +40,7 @@ struct ovl_readdir_data { struct rb_root root; struct list_head *list; struct list_head middle; - struct dentry *dir; + struct ovl_cache_entry *first_maybe_whiteout; int count; int err; }; @@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, return NULL; } -static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, +static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, p->is_whiteout = false; if (d_type == DT_CHR) { - struct dentry *dentry; - const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) { - kfree(p); - return NULL; - } - - /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); - - dentry = lookup_one_len(name, dir, len); - if (!IS_ERR(dentry)) { - p->is_whiteout = ovl_is_whiteout(dentry); - dput(dentry); - } - revert_creds(old_cred); - put_cred(override_cred); + p->next_maybe_whiteout = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p; } return p; } @@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; } - p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) return -ENOMEM; @@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, if (p) { list_move_tail(&p->l_node, &rdd->middle); } else { - p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) rdd->err = -ENOMEM; else @@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); } +static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +{ + int err; + struct ovl_cache_entry *p; + struct dentry *dentry; + const struct cred *old_cred; + struct cred *override_cred; + + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; + + /* + * CAP_DAC_OVERRIDE for lookup + */ + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + old_cred = override_creds(override_cred); + + err = mutex_lock_killable(&dir->d_inode->i_mutex); + if (!err) { + while (rdd->first_maybe_whiteout) { + p = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p->next_maybe_whiteout; + dentry = lookup_one_len(p->name, dir, p->len); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } + } + mutex_unlock(&dir->d_inode->i_mutex); + } + revert_creds(old_cred); + put_cred(override_cred); + + return err; +} + static inline int ovl_dir_read(struct path *realpath, struct ovl_readdir_data *rdd) { @@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); - rdd->dir = realpath->dentry; + rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { rdd->count = 0; @@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath, if (err >= 0) err = rdd->err; } while (!err && rdd->count); + + if (!err && rdd->first_maybe_whiteout) + err = ovl_check_whiteouts(realpath->dentry, rdd); + fput(realfile); return err; diff --git a/kernel/fs/pnode.h b/kernel/fs/pnode.h index 7114ce6e6..0fcdbe7ca 100644 --- a/kernel/fs/pnode.h +++ b/kernel/fs/pnode.h @@ -20,8 +20,6 @@ #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) -#define IS_MNT_LOCKED_AND_LAZY(m) \ - (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 diff --git a/kernel/fs/signalfd.c b/kernel/fs/signalfd.c index 7e412ad74..270221fce 100644 --- a/kernel/fs/signalfd.c +++ b/kernel/fs/signalfd.c @@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (kinfo->si_code == BUS_MCEERR_AR || - kinfo->si_code == BUS_MCEERR_AO) + if (kinfo->si_signo == SIGBUS && + (kinfo->si_code == BUS_MCEERR_AR || + kinfo->si_code == BUS_MCEERR_AO)) err |= __put_user((short) kinfo->si_addr_lsb, &uinfo->ssi_addr_lsb); #endif diff --git a/kernel/fs/xfs/libxfs/xfs_attr_remote.c b/kernel/fs/xfs/libxfs/xfs_attr_remote.c index 20de88d1b..dd714037c 100644 --- a/kernel/fs/xfs/libxfs/xfs_attr_remote.c +++ b/kernel/fs/xfs/libxfs/xfs_attr_remote.c @@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + int blksize = mp->m_attr_geo->blksize; char *ptr; int len; xfs_daddr_t bno; - int blksize = mp->m_attr_geo->blksize; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify( ASSERT(len >= blksize); while (len > 0) { + struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; + if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { xfs_buf_ioerror(bp, -EFSCORRUPTED); xfs_verifier_error(bp); return; } - if (bip) { - struct xfs_attr3_rmt_hdr *rmt; - rmt = (struct xfs_attr3_rmt_hdr *)ptr; - rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); + /* + * Ensure we aren't writing bogus LSNs to disk. See + * xfs_attr3_rmt_hdr_set() for the explanation. + */ + if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { + xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set( rmt->rm_owner = cpu_to_be64(ino); rmt->rm_blkno = cpu_to_be64(bno); + /* + * Remote attribute blocks are written synchronously, so we don't + * have an LSN that we can stamp in them that makes any sense to log + * recovery. To ensure that log recovery handles overwrites of these + * blocks sanely (i.e. once they've been freed and reallocated as some + * other type of metadata) we need to ensure that the LSN has a value + * that tells log recovery to ignore the LSN and overwrite the buffer + * with whatever is in it's log. To do this, we use the magic + * NULLCOMMITLSN to indicate that the LSN is invalid. + */ + rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); + return sizeof(struct xfs_attr3_rmt_hdr); } @@ -434,14 +451,21 @@ xfs_attr_rmtval_set( /* * Allocate a single extent, up to the size of the value. + * + * Note that we have to consider this a data allocation as we + * write the remote attribute without logging the contents. + * Hence we must ensure that we aren't using blocks that are on + * the busy list so that we don't overwrite blocks which have + * recently been freed but their transactions are not yet + * committed to disk. If we overwrite the contents of a busy + * extent and then crash then the block may not contain the + * correct metadata after log recovery occurs. */ xfs_bmap_init(args->flist, args->firstblock); nmap = 1; error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, - XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, - args->firstblock, args->total, &map, &nmap, - args->flist); + blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, + args->total, &map, &nmap, args->flist); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); diff --git a/kernel/fs/xfs/xfs_attr_inactive.c b/kernel/fs/xfs/xfs_attr_inactive.c index 3fbf167cf..73e75a87a 100644 --- a/kernel/fs/xfs/xfs_attr_inactive.c +++ b/kernel/fs/xfs/xfs_attr_inactive.c @@ -435,8 +435,14 @@ xfs_attr_inactive( */ xfs_trans_ijoin(trans, dp, 0); - /* invalidate and truncate the attribute fork extents */ - if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { + /* + * Invalidate and truncate the attribute fork extents. Make sure the + * fork actually has attributes as otherwise the invalidation has no + * blocks to read and returns an error. In this case, just do the fork + * removal below. + */ + if (xfs_inode_hasattr(dp) && + dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; diff --git a/kernel/fs/xfs/xfs_inode.c b/kernel/fs/xfs/xfs_inode.c index 539a85fdd..fec4bfba0 100644 --- a/kernel/fs/xfs/xfs_inode.c +++ b/kernel/fs/xfs/xfs_inode.c @@ -164,7 +164,7 @@ xfs_ilock( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); if (lock_flags & XFS_IOLOCK_EXCL) mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); @@ -212,7 +212,7 @@ xfs_ilock_nowait( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); if (lock_flags & XFS_IOLOCK_EXCL) { if (!mrtryupdate(&ip->i_iolock)) @@ -281,7 +281,7 @@ xfs_iunlock( (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); ASSERT(lock_flags != 0); if (lock_flags & XFS_IOLOCK_EXCL) @@ -364,30 +364,38 @@ int xfs_lock_delays; /* * Bump the subclass so xfs_lock_inodes() acquires each lock with a different - * value. This shouldn't be called for page fault locking, but we also need to - * ensure we don't overrun the number of lockdep subclasses for the iolock or - * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. + * value. This can be called for any type of inode lock combination, including + * parent locking. Care must be taken to ensure we don't overrun the subclass + * storage fields in the class mask we build. */ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { + int class = 0; + + ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP | + XFS_ILOCK_RTSUM))); + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { - ASSERT(subclass + XFS_LOCK_INUMORDER < - (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); + ASSERT(subclass + XFS_IOLOCK_PARENT_VAL < + MAX_LOCKDEP_SUBCLASSES); + class += subclass << XFS_IOLOCK_SHIFT; + if (lock_mode & XFS_IOLOCK_PARENT) + class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT; } if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { - ASSERT(subclass + XFS_LOCK_INUMORDER < - (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << - XFS_MMAPLOCK_SHIFT; + ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS); + class += subclass << XFS_MMAPLOCK_SHIFT; } - if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) { + ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS); + class += subclass << XFS_ILOCK_SHIFT; + } - return lock_mode; + return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class; } /* @@ -399,6 +407,11 @@ xfs_lock_inumorder(int lock_mode, int subclass) * transaction (such as truncate). This can result in deadlock since the long * running trans might need to wait for the inode we just locked in order to * push the tail and free space in the log. + * + * xfs_lock_inodes() can only be used to lock one type of lock at a time - + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we + * lock more than one at a time, lockdep will report false positives saying we + * have violated locking orders. */ void xfs_lock_inodes( @@ -409,8 +422,29 @@ xfs_lock_inodes( int attempts = 0, i, j, try_lock; xfs_log_item_t *lp; - /* currently supports between 2 and 5 inodes */ + /* + * Currently supports between 2 and 5 inodes with exclusive locking. We + * support an arbitrary depth of locking here, but absolute limits on + * inodes depend on the the type of locking and the limits placed by + * lockdep annotations in xfs_lock_inumorder. These are all checked by + * the asserts. + */ ASSERT(ips && inodes >= 2 && inodes <= 5); + ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL | + XFS_ILOCK_EXCL)); + ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | + XFS_ILOCK_SHARED))); + ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) || + inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1); + ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || + inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); + ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || + inodes <= XFS_ILOCK_MAX_SUBCLASS + 1); + + if (lock_mode & XFS_IOLOCK_EXCL) { + ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL))); + } else if (lock_mode & XFS_MMAPLOCK_EXCL) + ASSERT(!(lock_mode & XFS_ILOCK_EXCL)); try_lock = 0; i = 0; diff --git a/kernel/fs/xfs/xfs_inode.h b/kernel/fs/xfs/xfs_inode.h index 8f22d2036..ee26a603c 100644 --- a/kernel/fs/xfs/xfs_inode.h +++ b/kernel/fs/xfs/xfs_inode.h @@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) * Flags for lockdep annotations. * * XFS_LOCK_PARENT - for directory operations that require locking a - * parent directory inode and a child entry inode. The parent gets locked - * with this flag so it gets a lockdep subclass of 1 and the child entry - * lock will have a lockdep subclass of 0. + * parent directory inode and a child entry inode. IOLOCK requires nesting, + * MMAPLOCK does not support this class, ILOCK requires a single subclass + * to differentiate parent from child. * * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary * inodes do not participate in the normal lock order, and thus have their @@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) * XFS_LOCK_INUMORDER - for locking several inodes at the some time * with xfs_lock_inodes(). This flag is used as the starting subclass * and each subsequent lock acquired will increment the subclass by one. - * So the first lock acquired will have a lockdep subclass of 4, the - * second lock will have a lockdep subclass of 5, and so on. It is - * the responsibility of the class builder to shift this to the correct - * portion of the lock_mode lockdep mask. + * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly + * limited to the subclasses we can represent via nesting. We need at least + * 5 inodes nest depth for the ILOCK through rename, and we also have to support + * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP + * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all + * 8 subclasses supported by lockdep. + * + * This also means we have to number the sub-classes in the lowest bits of + * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep + * mask and we can't use bit-masking to build the subclasses. What a mess. + * + * Bit layout: + * + * Bit Lock Region + * 16-19 XFS_IOLOCK_SHIFT dependencies + * 20-23 XFS_MMAPLOCK_SHIFT dependencies + * 24-31 XFS_ILOCK_SHIFT dependencies + * + * IOLOCK values + * + * 0-3 subclass value + * 4-7 PARENT subclass values + * + * MMAPLOCK values + * + * 0-3 subclass value + * 4-7 unused + * + * ILOCK values + * 0-4 subclass values + * 5 PARENT subclass (not nestable) + * 6 RTBITMAP subclass (not nestable) + * 7 RTSUM subclass (not nestable) + * */ -#define XFS_LOCK_PARENT 1 -#define XFS_LOCK_RTBITMAP 2 -#define XFS_LOCK_RTSUM 3 -#define XFS_LOCK_INUMORDER 4 - -#define XFS_IOLOCK_SHIFT 16 -#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) - -#define XFS_MMAPLOCK_SHIFT 20 - -#define XFS_ILOCK_SHIFT 24 -#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) -#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) - -#define XFS_IOLOCK_DEP_MASK 0x000f0000 -#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 -#define XFS_ILOCK_DEP_MASK 0xff000000 -#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ +#define XFS_IOLOCK_SHIFT 16 +#define XFS_IOLOCK_PARENT_VAL 4 +#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1) +#define XFS_IOLOCK_DEP_MASK 0x000f0000 +#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT) + +#define XFS_MMAPLOCK_SHIFT 20 +#define XFS_MMAPLOCK_NUMORDER 0 +#define XFS_MMAPLOCK_MAX_SUBCLASS 3 +#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 + +#define XFS_ILOCK_SHIFT 24 +#define XFS_ILOCK_PARENT_VAL 5 +#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1) +#define XFS_ILOCK_RTBITMAP_VAL 6 +#define XFS_ILOCK_RTSUM_VAL 7 +#define XFS_ILOCK_DEP_MASK 0xff000000 +#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT) + +#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \ XFS_MMAPLOCK_DEP_MASK | \ XFS_ILOCK_DEP_MASK) diff --git a/kernel/fs/xfs/xfs_log_recover.c b/kernel/fs/xfs/xfs_log_recover.c index 4f5784f85..a5d03396d 100644 --- a/kernel/fs/xfs/xfs_log_recover.c +++ b/kernel/fs/xfs/xfs_log_recover.c @@ -1887,9 +1887,14 @@ xlog_recover_get_buf_lsn( uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; break; case XFS_ATTR3_RMT_MAGIC: - lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); - uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; - break; + /* + * Remote attr blocks are written synchronously, rather than + * being logged. That means they do not contain a valid LSN + * (i.e. transactionally ordered) in them, and hence any time we + * see a buffer to replay over the top of a remote attribute + * block we should simply do so. + */ + goto recover_immediately; case XFS_SB_MAGIC: lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); uuid = &((struct xfs_dsb *)blk)->sb_uuid; diff --git a/kernel/fs/xfs/xfs_symlink.c b/kernel/fs/xfs/xfs_symlink.c index 3df411ead..40c076523 100644 --- a/kernel/fs/xfs/xfs_symlink.c +++ b/kernel/fs/xfs/xfs_symlink.c @@ -104,7 +104,7 @@ xfs_readlink_bmap( cur_chunk += sizeof(struct xfs_dsymlink_hdr); } - memcpy(link + offset, bp->b_addr, byte_cnt); + memcpy(link + offset, cur_chunk, byte_cnt); pathlen -= byte_cnt; offset += byte_cnt; |