diff options
Diffstat (limited to 'kernel/fs/ext4')
37 files changed, 2803 insertions, 2322 deletions
diff --git a/kernel/fs/ext4/Kconfig b/kernel/fs/ext4/Kconfig index 024f2284d..b46e9fc64 100644 --- a/kernel/fs/ext4/Kconfig +++ b/kernel/fs/ext4/Kconfig @@ -1,5 +1,38 @@ +# Ext3 configs are here for backward compatibility with old configs which may +# have EXT3_FS set but not EXT4_FS set and thus would result in non-bootable +# kernels after the removal of ext3 driver. +config EXT3_FS + tristate "The Extended 3 (ext3) filesystem" + # These must match EXT4_FS selects... + select EXT4_FS + select JBD2 + select CRC16 + select CRYPTO + select CRYPTO_CRC32C + help + This config option is here only for backward compatibility. ext3 + filesystem is now handled by the ext4 driver. + +config EXT3_FS_POSIX_ACL + bool "Ext3 POSIX Access Control Lists" + depends on EXT3_FS + select EXT4_FS_POSIX_ACL + select FS_POSIX_ACL + help + This config option is here only for backward compatibility. ext3 + filesystem is now handled by the ext4 driver. + +config EXT3_FS_SECURITY + bool "Ext3 Security Labels" + depends on EXT3_FS + select EXT4_FS_SECURITY + help + This config option is here only for backward compatibility. ext3 + filesystem is now handled by the ext4 driver. + config EXT4_FS tristate "The Extended 4 (ext4) filesystem" + # Please update EXT3_FS selects when changing these select JBD2 select CRC16 select CRYPTO @@ -16,26 +49,27 @@ config EXT4_FS up fsck time. For more information, please see the web pages at http://ext4.wiki.kernel.org. - The ext4 filesystem will support mounting an ext3 - filesystem; while there will be some performance gains from - the delayed allocation and inode table readahead, the best - performance gains will require enabling ext4 features in the - filesystem, or formatting a new filesystem as an ext4 - filesystem initially. + The ext4 filesystem supports mounting an ext3 filesystem; while there + are some performance gains from the delayed allocation and inode + table readahead, the best performance gains require enabling ext4 + features in the filesystem using tune2fs, or formatting a new + filesystem as an ext4 filesystem initially. Without explicit enabling + of ext4 features, the on disk filesystem format stays fully backward + compatible. To compile this file system support as a module, choose M here. The module will be called ext4. If unsure, say N. -config EXT4_USE_FOR_EXT23 - bool "Use ext4 for ext2/ext3 file systems" +config EXT4_USE_FOR_EXT2 + bool "Use ext4 for ext2 file systems" depends on EXT4_FS - depends on EXT3_FS=n || EXT2_FS=n + depends on EXT2_FS=n default y help - Allow the ext4 file system driver code to be used for ext2 or - ext3 file system mounts. This allows users to reduce their + Allow the ext4 file system driver code to be used for ext2 + file system mounts. This allows users to reduce their compiled kernel size by using one file system driver for ext2, ext3, and ext4 file systems. @@ -72,6 +106,7 @@ config EXT4_ENCRYPTION select CRYPTO_ECB select CRYPTO_XTS select CRYPTO_CTS + select CRYPTO_CTR select CRYPTO_SHA256 select KEYS select ENCRYPTED_KEYS diff --git a/kernel/fs/ext4/Makefile b/kernel/fs/ext4/Makefile index 75285ea9a..f52cf54f0 100644 --- a/kernel/fs/ext4/Makefile +++ b/kernel/fs/ext4/Makefile @@ -8,7 +8,7 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ - xattr_trusted.o inline.o readpage.o + xattr_trusted.o inline.o readpage.o sysfs.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/kernel/fs/ext4/balloc.c b/kernel/fs/ext4/balloc.c index 955bf49a7..fe1f50fe7 100644 --- a/kernel/fs/ext4/balloc.c +++ b/kernel/fs/ext4/balloc.c @@ -203,7 +203,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, count); } set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return -EIO; + return -EFSBADCRC; } memset(bh->b_data, 0, sb->s_blocksize); @@ -213,7 +213,7 @@ static int ext4_init_block_bitmap(struct super_block *sb, start = ext4_group_first_block_no(sb, block_group); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + if (ext4_has_feature_flex_bg(sb)) flex_bg = 1; /* Set bits for block and inode bitmaps, and inode table */ @@ -322,7 +322,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, ext4_fsblk_t blk; ext4_fsblk_t group_first_block; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { + if (ext4_has_feature_flex_bg(sb)) { /* with FLEX_BG, the inode/block bitmaps and itable * blocks may not be in the group at all * so the bitmap validation will be skipped for those groups @@ -360,42 +360,45 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, return 0; } -static void ext4_validate_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - ext4_group_t block_group, - struct buffer_head *bh) +static int ext4_validate_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + ext4_group_t block_group, + struct buffer_head *bh) { ext4_fsblk_t blk; struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); struct ext4_sb_info *sbi = EXT4_SB(sb); if (buffer_verified(bh)) - return; + return 0; + if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) + return -EFSCORRUPTED; ext4_lock_group(sb, block_group); - blk = ext4_valid_block_bitmap(sb, desc, block_group, bh); - if (unlikely(blk != 0)) { + if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, + desc, bh))) { ext4_unlock_group(sb, block_group); - ext4_error(sb, "bg %u: block %llu: invalid block bitmap", - block_group, blk); + ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, grp->bb_free); set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - return; + return -EFSBADCRC; } - if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, - desc, bh))) { + blk = ext4_valid_block_bitmap(sb, desc, block_group, bh); + if (unlikely(blk != 0)) { ext4_unlock_group(sb, block_group); - ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); + ext4_error(sb, "bg %u: block %llu: invalid block bitmap", + block_group, blk); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, grp->bb_free); set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - return; + return -EFSCORRUPTED; } set_buffer_verified(bh); ext4_unlock_group(sb, block_group); + return 0; } /** @@ -414,17 +417,18 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) struct ext4_group_desc *desc; struct buffer_head *bh; ext4_fsblk_t bitmap_blk; + int err; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - return NULL; + return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_block_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { ext4_error(sb, "Cannot get buffer for block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-ENOMEM); } if (bitmap_uptodate(bh)) @@ -437,16 +441,17 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - int err; - err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) - ext4_error(sb, "Checksum bad for grp %u", block_group); - return bh; + if (err) { + ext4_error(sb, "Failed to init block bitmap for group " + "%u: %d", block_group, err); + goto out; + } + goto verify; } ext4_unlock_group(sb, block_group); if (buffer_uptodate(bh)) { @@ -468,11 +473,13 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) submit_bh(READ | REQ_META | REQ_PRIO, bh); return bh; verify: - ext4_validate_block_bitmap(sb, desc, block_group, bh); - if (buffer_verified(bh)) - return bh; + err = ext4_validate_block_bitmap(sb, desc, block_group, bh); + if (err) + goto out; + return bh; +out: put_bh(bh); - return NULL; + return ERR_PTR(err); } /* Returns 0 on success, 1 on error */ @@ -485,32 +492,32 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, return 0; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - return 1; + return -EFSCORRUPTED; wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext4_error(sb, "Cannot read block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, (unsigned long long) bh->b_blocknr); - return 1; + return -EIO; } clear_buffer_new(bh); /* Panic or remount fs read-only if block bitmap is invalid */ - ext4_validate_block_bitmap(sb, desc, block_group, bh); - /* ...but check for error just in case errors=continue. */ - return !buffer_verified(bh); + return ext4_validate_block_bitmap(sb, desc, block_group, bh); } struct buffer_head * ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) { struct buffer_head *bh; + int err; bh = ext4_read_block_bitmap_nowait(sb, block_group); - if (!bh) - return NULL; - if (ext4_wait_block_bitmap(sb, block_group, bh)) { + if (IS_ERR(bh)) + return bh; + err = ext4_wait_block_bitmap(sb, block_group, bh); + if (err) { put_bh(bh); - return NULL; + return ERR_PTR(err); } return bh; } @@ -681,8 +688,10 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) desc_count += ext4_free_group_clusters(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_block_bitmap(sb, i); - if (bitmap_bh == NULL) + if (IS_ERR(bitmap_bh)) { + bitmap_bh = NULL; continue; + } x = ext4_count_free(bitmap_bh->b_data, EXT4_CLUSTERS_PER_GROUP(sb) / 8); @@ -740,14 +749,13 @@ int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) if (group == 0) return 1; - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) { + if (ext4_has_feature_sparse_super2(sb)) { if (group == le32_to_cpu(es->s_backup_bgs[0]) || group == le32_to_cpu(es->s_backup_bgs[1])) return 1; return 0; } - if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) + if ((group <= 1) || !ext4_has_feature_sparse_super(sb)) return 1; if (!(group & 1)) return 0; @@ -776,7 +784,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, if (!ext4_bg_has_super(sb, group)) return 0; - if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) + if (ext4_has_feature_meta_bg(sb)) return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); else return EXT4_SB(sb)->s_gdb_count; @@ -797,8 +805,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || - metagroup < first_meta_bg) + if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) return ext4_bg_num_gdb_nometa(sb, group); return ext4_bg_num_gdb_meta(sb,group); @@ -818,7 +825,7 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb, /* Check for superblock and gdt backups in this group */ num = ext4_bg_has_super(sb, block_group); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || + if (!ext4_has_feature_meta_bg(sb) || block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * sbi->s_desc_per_block) { if (num) { diff --git a/kernel/fs/ext4/block_validity.c b/kernel/fs/ext4/block_validity.c index 3522340c7..02ddec6d8 100644 --- a/kernel/fs/ext4/block_validity.c +++ b/kernel/fs/ext4/block_validity.c @@ -234,7 +234,7 @@ int ext4_check_blockref(const char *function, unsigned int line, es->s_last_error_block = cpu_to_le64(blk); ext4_error_inode(inode, function, line, blk, "invalid block"); - return -EIO; + return -EFSCORRUPTED; } } return 0; diff --git a/kernel/fs/ext4/crypto.c b/kernel/fs/ext4/crypto.c index 8ff15273a..1a0835073 100644 --- a/kernel/fs/ext4/crypto.c +++ b/kernel/fs/ext4/crypto.c @@ -55,6 +55,9 @@ static mempool_t *ext4_bounce_page_pool; static LIST_HEAD(ext4_free_crypto_ctxs); static DEFINE_SPINLOCK(ext4_crypto_ctx_lock); +static struct kmem_cache *ext4_crypto_ctx_cachep; +struct kmem_cache *ext4_crypt_info_cachep; + /** * ext4_release_crypto_ctx() - Releases an encryption context * @ctx: The encryption context to release. @@ -68,18 +71,12 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx) { unsigned long flags; - if (ctx->bounce_page) { - if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) - __free_page(ctx->bounce_page); - else - mempool_free(ctx->bounce_page, ext4_bounce_page_pool); - ctx->bounce_page = NULL; - } - ctx->control_page = NULL; + if (ctx->flags & EXT4_WRITE_PATH_FL && ctx->w.bounce_page) + mempool_free(ctx->w.bounce_page, ext4_bounce_page_pool); + ctx->w.bounce_page = NULL; + ctx->w.control_page = NULL; if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) { - if (ctx->tfm) - crypto_free_tfm(ctx->tfm); - kfree(ctx); + kmem_cache_free(ext4_crypto_ctx_cachep, ctx); } else { spin_lock_irqsave(&ext4_crypto_ctx_lock, flags); list_add(&ctx->free_list, &ext4_free_crypto_ctxs); @@ -88,23 +85,6 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx) } /** - * ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context - * @mask: The allocation mask. - * - * Return: An allocated and initialized encryption context on success. An error - * value or NULL otherwise. - */ -static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask) -{ - struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx), - mask); - - if (!ctx) - return ERR_PTR(-ENOMEM); - return ctx; -} - -/** * ext4_get_crypto_ctx() - Gets an encryption context * @inode: The inode for which we are doing the crypto * @@ -118,10 +98,10 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode) struct ext4_crypto_ctx *ctx = NULL; int res = 0; unsigned long flags; - struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key; + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - if (!ext4_read_workqueue) - ext4_init_crypto(); + if (ci == NULL) + return ERR_PTR(-ENOKEY); /* * We first try getting the ctx from a free list because in @@ -140,50 +120,16 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode) list_del(&ctx->free_list); spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags); if (!ctx) { - ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS); - if (IS_ERR(ctx)) { - res = PTR_ERR(ctx); + ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS); + if (!ctx) { + res = -ENOMEM; goto out; } ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL; } else { ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL; } - - /* Allocate a new Crypto API context if we don't already have - * one or if it isn't the right mode. */ - BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID); - if (ctx->tfm && (ctx->mode != key->mode)) { - crypto_free_tfm(ctx->tfm); - ctx->tfm = NULL; - ctx->mode = EXT4_ENCRYPTION_MODE_INVALID; - } - if (!ctx->tfm) { - switch (key->mode) { - case EXT4_ENCRYPTION_MODE_AES_256_XTS: - ctx->tfm = crypto_ablkcipher_tfm( - crypto_alloc_ablkcipher("xts(aes)", 0, 0)); - break; - case EXT4_ENCRYPTION_MODE_AES_256_GCM: - /* TODO(mhalcrow): AEAD w/ gcm(aes); - * crypto_aead_setauthsize() */ - ctx->tfm = ERR_PTR(-ENOTSUPP); - break; - default: - BUG(); - } - if (IS_ERR_OR_NULL(ctx->tfm)) { - res = PTR_ERR(ctx->tfm); - ctx->tfm = NULL; - goto out; - } - ctx->mode = key->mode; - } - BUG_ON(key->size != ext4_encryption_key_size(key->mode)); - - /* There shouldn't be a bounce page attached to the crypto - * context at this point. */ - BUG_ON(ctx->bounce_page); + ctx->flags &= ~EXT4_WRITE_PATH_FL; out: if (res) { @@ -204,20 +150,8 @@ void ext4_exit_crypto(void) { struct ext4_crypto_ctx *pos, *n; - list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) { - if (pos->bounce_page) { - if (pos->flags & - EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) { - __free_page(pos->bounce_page); - } else { - mempool_free(pos->bounce_page, - ext4_bounce_page_pool); - } - } - if (pos->tfm) - crypto_free_tfm(pos->tfm); - kfree(pos); - } + list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) + kmem_cache_free(ext4_crypto_ctx_cachep, pos); INIT_LIST_HEAD(&ext4_free_crypto_ctxs); if (ext4_bounce_page_pool) mempool_destroy(ext4_bounce_page_pool); @@ -225,6 +159,12 @@ void ext4_exit_crypto(void) if (ext4_read_workqueue) destroy_workqueue(ext4_read_workqueue); ext4_read_workqueue = NULL; + if (ext4_crypto_ctx_cachep) + kmem_cache_destroy(ext4_crypto_ctx_cachep); + ext4_crypto_ctx_cachep = NULL; + if (ext4_crypt_info_cachep) + kmem_cache_destroy(ext4_crypt_info_cachep); + ext4_crypt_info_cachep = NULL; } /** @@ -237,23 +177,31 @@ void ext4_exit_crypto(void) */ int ext4_init_crypto(void) { - int i, res; + int i, res = -ENOMEM; mutex_lock(&crypto_init); if (ext4_read_workqueue) goto already_initialized; ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0); - if (!ext4_read_workqueue) { - res = -ENOMEM; + if (!ext4_read_workqueue) + goto fail; + + ext4_crypto_ctx_cachep = KMEM_CACHE(ext4_crypto_ctx, + SLAB_RECLAIM_ACCOUNT); + if (!ext4_crypto_ctx_cachep) + goto fail; + + ext4_crypt_info_cachep = KMEM_CACHE(ext4_crypt_info, + SLAB_RECLAIM_ACCOUNT); + if (!ext4_crypt_info_cachep) goto fail; - } for (i = 0; i < num_prealloc_crypto_ctxs; i++) { struct ext4_crypto_ctx *ctx; - ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL); - if (IS_ERR(ctx)) { - res = PTR_ERR(ctx); + ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS); + if (!ctx) { + res = -ENOMEM; goto fail; } list_add(&ctx->free_list, &ext4_free_crypto_ctxs); @@ -305,8 +253,7 @@ typedef enum { EXT4_ENCRYPT, } ext4_direction_t; -static int ext4_page_crypto(struct ext4_crypto_ctx *ctx, - struct inode *inode, +static int ext4_page_crypto(struct inode *inode, ext4_direction_t rw, pgoff_t index, struct page *src_page, @@ -317,32 +264,11 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx, struct ablkcipher_request *req = NULL; DECLARE_EXT4_COMPLETION_RESULT(ecr); struct scatterlist dst, src; - struct ext4_inode_info *ei = EXT4_I(inode); - struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm); + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; int res = 0; - BUG_ON(!ctx->tfm); - BUG_ON(ctx->mode != ei->i_encryption_key.mode); - - if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) { - printk_ratelimited(KERN_ERR - "%s: unsupported crypto algorithm: %d\n", - __func__, ctx->mode); - return -ENOTSUPP; - } - - crypto_ablkcipher_clear_flags(atfm, ~0); - crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY); - - res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw, - ei->i_encryption_key.size); - if (res) { - printk_ratelimited(KERN_ERR - "%s: crypto_ablkcipher_setkey() failed\n", - __func__); - return res; - } - req = ablkcipher_request_alloc(atfm, GFP_NOFS); + req = ablkcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited(KERN_ERR "%s: crypto_request_alloc() failed\n", @@ -369,7 +295,6 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx, else res = crypto_ablkcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } @@ -384,6 +309,15 @@ static int ext4_page_crypto(struct ext4_crypto_ctx *ctx, return 0; } +static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx) +{ + ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT); + if (ctx->w.bounce_page == NULL) + return ERR_PTR(-ENOMEM); + ctx->flags |= EXT4_WRITE_PATH_FL; + return ctx->w.bounce_page; +} + /** * ext4_encrypt() - Encrypts a page * @inode: The inode for which the encryption should take place @@ -413,27 +347,17 @@ struct page *ext4_encrypt(struct inode *inode, return (struct page *) ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_page(GFP_NOFS); - if (!ciphertext_page) { - /* This is a potential bottleneck, but at least we'll have - * forward progress. */ - ciphertext_page = mempool_alloc(ext4_bounce_page_pool, - GFP_NOFS); - if (WARN_ON_ONCE(!ciphertext_page)) { - ciphertext_page = mempool_alloc(ext4_bounce_page_pool, - GFP_NOFS | __GFP_WAIT); - } - ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; - } else { - ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; - } - ctx->bounce_page = ciphertext_page; - ctx->control_page = plaintext_page; - err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index, + ciphertext_page = alloc_bounce_page(ctx); + if (IS_ERR(ciphertext_page)) + goto errout; + ctx->w.control_page = plaintext_page; + err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index, plaintext_page, ciphertext_page); if (err) { + ciphertext_page = ERR_PTR(err); + errout: ext4_release_crypto_ctx(ctx); - return ERR_PTR(err); + return ciphertext_page; } SetPagePrivate(ciphertext_page); set_page_private(ciphertext_page, (unsigned long)ctx); @@ -452,40 +376,29 @@ struct page *ext4_encrypt(struct inode *inode, * * Return: Zero on success, non-zero otherwise. */ -int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page) +int ext4_decrypt(struct page *page) { BUG_ON(!PageLocked(page)); - return ext4_page_crypto(ctx, page->mapping->host, + return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT, page->index, page, page); } -/* - * Convenience function which takes care of allocating and - * deallocating the encryption context - */ -int ext4_decrypt_one(struct inode *inode, struct page *page) -{ - int ret; - - struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode); - - if (!ctx) - return -ENOMEM; - ret = ext4_decrypt(ctx, page); - ext4_release_crypto_ctx(ctx); - return ret; -} - int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) { struct ext4_crypto_ctx *ctx; struct page *ciphertext_page = NULL; struct bio *bio; - ext4_lblk_t lblk = ex->ee_block; + ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); ext4_fsblk_t pblk = ext4_ext_pblock(ex); unsigned int len = ext4_ext_get_actual_len(ex); - int err = 0; + int ret, err = 0; + +#if 0 + ext4_msg(inode->i_sb, KERN_CRIT, + "ext4_encrypted_zeroout ino %lu lblk %u len %u", + (unsigned long) inode->i_ino, lblk, len); +#endif BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); @@ -493,24 +406,14 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) if (IS_ERR(ctx)) return PTR_ERR(ctx); - ciphertext_page = alloc_page(GFP_NOFS); - if (!ciphertext_page) { - /* This is a potential bottleneck, but at least we'll have - * forward progress. */ - ciphertext_page = mempool_alloc(ext4_bounce_page_pool, - GFP_NOFS); - if (WARN_ON_ONCE(!ciphertext_page)) { - ciphertext_page = mempool_alloc(ext4_bounce_page_pool, - GFP_NOFS | __GFP_WAIT); - } - ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; - } else { - ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; + ciphertext_page = alloc_bounce_page(ctx); + if (IS_ERR(ciphertext_page)) { + err = PTR_ERR(ciphertext_page); + goto errout; } - ctx->bounce_page = ciphertext_page; while (len--) { - err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk, + err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk, ZERO_PAGE(0), ciphertext_page); if (err) goto errout; @@ -521,16 +424,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) goto errout; } bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_iter.bi_sector = pblk; - err = bio_add_page(bio, ciphertext_page, + bio->bi_iter.bi_sector = + pblk << (inode->i_sb->s_blocksize_bits - 9); + ret = bio_add_page(bio, ciphertext_page, inode->i_sb->s_blocksize, 0); - if (err) { + if (ret != inode->i_sb->s_blocksize) { + /* should never happen! */ + ext4_msg(inode->i_sb, KERN_ERR, + "bio_add_page failed: %d", ret); + WARN_ON(1); bio_put(bio); + err = -EIO; goto errout; } err = submit_bio_wait(WRITE, bio); + if ((err == 0) && bio->bi_error) + err = -EIO; + bio_put(bio); if (err) goto errout; + lblk++; pblk++; } err = 0; errout: diff --git a/kernel/fs/ext4/crypto_fname.c b/kernel/fs/ext4/crypto_fname.c index fded02f72..2fbef8a14 100644 --- a/kernel/fs/ext4/crypto_fname.c +++ b/kernel/fs/ext4/crypto_fname.c @@ -19,7 +19,6 @@ #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/key.h> -#include <linux/key.h> #include <linux/list.h> #include <linux/mempool.h> #include <linux/random.h> @@ -48,6 +47,12 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode) return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS); } +static unsigned max_name_len(struct inode *inode) +{ + return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : + EXT4_NAME_LEN; +} + /** * ext4_fname_encrypt() - * @@ -55,43 +60,52 @@ bool ext4_valid_filenames_enc_mode(uint32_t mode) * ciphertext. Errors are returned as negative numbers. We trust the caller to * allocate sufficient memory to oname string. */ -static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx, +static int ext4_fname_encrypt(struct inode *inode, const struct qstr *iname, struct ext4_str *oname) { u32 ciphertext_len; struct ablkcipher_request *req = NULL; DECLARE_EXT4_COMPLETION_RESULT(ecr); - struct crypto_ablkcipher *tfm = ctx->ctfm; + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; int res = 0; char iv[EXT4_CRYPTO_BLOCK_SIZE]; - struct scatterlist sg[1]; - int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK); - char *workbuf; + struct scatterlist src_sg, dst_sg; + int padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK); + char *workbuf, buf[32], *alloc_buf = NULL; + unsigned lim = max_name_len(inode); - if (iname->len <= 0 || iname->len > ctx->lim) + if (iname->len <= 0 || iname->len > lim) return -EIO; ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ? EXT4_CRYPTO_BLOCK_SIZE : iname->len; ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding); - ciphertext_len = (ciphertext_len > ctx->lim) - ? ctx->lim : ciphertext_len; + ciphertext_len = (ciphertext_len > lim) + ? lim : ciphertext_len; + + if (ciphertext_len <= sizeof(buf)) { + workbuf = buf; + } else { + alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); + if (!alloc_buf) + return -ENOMEM; + workbuf = alloc_buf; + } /* Allocate request */ req = ablkcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited( KERN_ERR "%s: crypto_request_alloc() failed\n", __func__); + kfree(alloc_buf); return -ENOMEM; } ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, ext4_dir_crypt_complete, &ecr); - /* Map the workpage */ - workbuf = kmap(ctx->workpage); - /* Copy the input */ memcpy(workbuf, iname->name, iname->len); if (iname->len < ciphertext_len) @@ -101,21 +115,15 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx, memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE); /* Create encryption request */ - sg_init_table(sg, 1); - sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0); - ablkcipher_request_set_crypt(req, sg, sg, ciphertext_len, iv); + sg_init_one(&src_sg, workbuf, ciphertext_len); + sg_init_one(&dst_sg, oname->name, ciphertext_len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); res = crypto_ablkcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } - if (res >= 0) { - /* Copy the result to output */ - memcpy(oname->name, workbuf, ciphertext_len); - res = ciphertext_len; - } - kunmap(ctx->workpage); + kfree(alloc_buf); ablkcipher_request_free(req); if (res < 0) { printk_ratelimited( @@ -132,20 +140,21 @@ static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx, * Errors are returned as negative numbers. * We trust the caller to allocate sufficient memory to oname string. */ -static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx, +static int ext4_fname_decrypt(struct inode *inode, const struct ext4_str *iname, struct ext4_str *oname) { struct ext4_str tmp_in[2], tmp_out[1]; struct ablkcipher_request *req = NULL; DECLARE_EXT4_COMPLETION_RESULT(ecr); - struct scatterlist sg[1]; - struct crypto_ablkcipher *tfm = ctx->ctfm; + struct scatterlist src_sg, dst_sg; + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; + struct crypto_ablkcipher *tfm = ci->ci_ctfm; int res = 0; char iv[EXT4_CRYPTO_BLOCK_SIZE]; - char *workbuf; + unsigned lim = max_name_len(inode); - if (iname->len <= 0 || iname->len > ctx->lim) + if (iname->len <= 0 || iname->len > lim) return -EIO; tmp_in[0].name = iname->name; @@ -163,31 +172,18 @@ static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, ext4_dir_crypt_complete, &ecr); - /* Map the workpage */ - workbuf = kmap(ctx->workpage); - - /* Copy the input */ - memcpy(workbuf, iname->name, iname->len); - /* Initialize IV */ memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE); /* Create encryption request */ - sg_init_table(sg, 1); - sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0); - ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv); + sg_init_one(&src_sg, iname->name, iname->len); + sg_init_one(&dst_sg, oname->name, oname->len); + ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); res = crypto_ablkcipher_decrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } - if (res >= 0) { - /* Copy the result to output */ - memcpy(oname->name, workbuf, iname->len); - res = iname->len; - } - kunmap(ctx->workpage); ablkcipher_request_free(req); if (res < 0) { printk_ratelimited( @@ -254,207 +250,6 @@ static int digest_decode(const char *src, int len, char *dst) } /** - * ext4_free_fname_crypto_ctx() - - * - * Frees up a crypto context. - */ -void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx) -{ - if (ctx == NULL || IS_ERR(ctx)) - return; - - if (ctx->ctfm && !IS_ERR(ctx->ctfm)) - crypto_free_ablkcipher(ctx->ctfm); - if (ctx->htfm && !IS_ERR(ctx->htfm)) - crypto_free_hash(ctx->htfm); - if (ctx->workpage && !IS_ERR(ctx->workpage)) - __free_page(ctx->workpage); - kfree(ctx); -} - -/** - * ext4_put_fname_crypto_ctx() - - * - * Return: The crypto context onto free list. If the free list is above a - * threshold, completely frees up the context, and returns the memory. - * - * TODO: Currently we directly free the crypto context. Eventually we should - * add code it to return to free list. Such an approach will increase - * efficiency of directory lookup. - */ -void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) -{ - if (*ctx == NULL || IS_ERR(*ctx)) - return; - ext4_free_fname_crypto_ctx(*ctx); - *ctx = NULL; -} - -/** - * ext4_search_fname_crypto_ctx() - - */ -static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx( - const struct ext4_encryption_key *key) -{ - return NULL; -} - -/** - * ext4_alloc_fname_crypto_ctx() - - */ -struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx( - const struct ext4_encryption_key *key) -{ - struct ext4_fname_crypto_ctx *ctx; - - ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS); - if (ctx == NULL) - return ERR_PTR(-ENOMEM); - if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) { - /* This will automatically set key mode to invalid - * As enum for ENCRYPTION_MODE_INVALID is zero */ - memset(&ctx->key, 0, sizeof(ctx->key)); - } else { - memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key)); - } - ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode) - ? 0 : 1; - ctx->ctfm_key_is_ready = 0; - ctx->ctfm = NULL; - ctx->htfm = NULL; - ctx->workpage = NULL; - return ctx; -} - -/** - * ext4_get_fname_crypto_ctx() - - * - * Allocates a free crypto context and initializes it to hold - * the crypto material for the inode. - * - * Return: NULL if not encrypted. Error value on error. Valid pointer otherwise. - */ -struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx( - struct inode *inode, u32 max_ciphertext_len) -{ - struct ext4_fname_crypto_ctx *ctx; - struct ext4_inode_info *ei = EXT4_I(inode); - int res; - - /* Check if the crypto policy is set on the inode */ - res = ext4_encrypted_inode(inode); - if (res == 0) - return NULL; - - if (!ext4_has_encryption_key(inode)) - ext4_generate_encryption_key(inode); - - /* Get a crypto context based on the key. - * A new context is allocated if no context matches the requested key. - */ - ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key)); - if (ctx == NULL) - ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key)); - if (IS_ERR(ctx)) - return ctx; - - ctx->flags = ei->i_crypt_policy_flags; - if (ctx->has_valid_key) { - if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) { - printk_once(KERN_WARNING - "ext4: unsupported key mode %d\n", - ctx->key.mode); - return ERR_PTR(-ENOKEY); - } - - /* As a first cut, we will allocate new tfm in every call. - * later, we will keep the tfm around, in case the key gets - * re-used */ - if (ctx->ctfm == NULL) { - ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))", - 0, 0); - } - if (IS_ERR(ctx->ctfm)) { - res = PTR_ERR(ctx->ctfm); - printk( - KERN_DEBUG "%s: error (%d) allocating crypto tfm\n", - __func__, res); - ctx->ctfm = NULL; - ext4_put_fname_crypto_ctx(&ctx); - return ERR_PTR(res); - } - if (ctx->ctfm == NULL) { - printk( - KERN_DEBUG "%s: could not allocate crypto tfm\n", - __func__); - ext4_put_fname_crypto_ctx(&ctx); - return ERR_PTR(-ENOMEM); - } - if (ctx->workpage == NULL) - ctx->workpage = alloc_page(GFP_NOFS); - if (IS_ERR(ctx->workpage)) { - res = PTR_ERR(ctx->workpage); - printk( - KERN_DEBUG "%s: error (%d) allocating work page\n", - __func__, res); - ctx->workpage = NULL; - ext4_put_fname_crypto_ctx(&ctx); - return ERR_PTR(res); - } - if (ctx->workpage == NULL) { - printk( - KERN_DEBUG "%s: could not allocate work page\n", - __func__); - ext4_put_fname_crypto_ctx(&ctx); - return ERR_PTR(-ENOMEM); - } - ctx->lim = max_ciphertext_len; - crypto_ablkcipher_clear_flags(ctx->ctfm, ~0); - crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm), - CRYPTO_TFM_REQ_WEAK_KEY); - - /* If we are lucky, we will get a context that is already - * set up with the right key. Else, we will have to - * set the key */ - if (!ctx->ctfm_key_is_ready) { - /* Since our crypto objectives for filename encryption - * are pretty weak, - * we directly use the inode master key */ - res = crypto_ablkcipher_setkey(ctx->ctfm, - ctx->key.raw, ctx->key.size); - if (res) { - ext4_put_fname_crypto_ctx(&ctx); - return ERR_PTR(-EIO); - } - ctx->ctfm_key_is_ready = 1; - } else { - /* In the current implementation, key should never be - * marked "ready" for a context that has just been - * allocated. So we should never reach here */ - BUG(); - } - } - if (ctx->htfm == NULL) - ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(ctx->htfm)) { - res = PTR_ERR(ctx->htfm); - printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n", - __func__, res); - ctx->htfm = NULL; - ext4_put_fname_crypto_ctx(&ctx); - return ERR_PTR(res); - } - if (ctx->htfm == NULL) { - printk(KERN_DEBUG "%s: could not allocate hash tfm\n", - __func__); - ext4_put_fname_crypto_ctx(&ctx); - return ERR_PTR(-ENOMEM); - } - - return ctx; -} - -/** * ext4_fname_crypto_round_up() - * * Return: The next multiple of block size @@ -464,44 +259,29 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize) return ((size+blksize-1)/blksize)*blksize; } -/** - * ext4_fname_crypto_namelen_on_disk() - - */ -int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, - u32 namelen) +unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen) { - u32 ciphertext_len; - int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK); - - if (ctx == NULL) - return -EIO; - if (!(ctx->has_valid_key)) - return -EACCES; - ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ? - EXT4_CRYPTO_BLOCK_SIZE : namelen; - ciphertext_len = ext4_fname_crypto_round_up(ciphertext_len, padding); - ciphertext_len = (ciphertext_len > ctx->lim) - ? ctx->lim : ciphertext_len; - return (int) ciphertext_len; + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; + int padding = 32; + + if (ci) + padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK); + if (ilen < EXT4_CRYPTO_BLOCK_SIZE) + ilen = EXT4_CRYPTO_BLOCK_SIZE; + return ext4_fname_crypto_round_up(ilen, padding); } -/** - * ext4_fname_crypto_alloc_obuff() - +/* + * ext4_fname_crypto_alloc_buffer() - * * Allocates an output buffer that is sufficient for the crypto operation * specified by the context and the direction. */ -int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx, +int ext4_fname_crypto_alloc_buffer(struct inode *inode, u32 ilen, struct ext4_str *crypto_str) { - unsigned int olen; - int padding = 4 << (ctx->flags & EXT4_POLICY_FLAGS_PAD_MASK); + unsigned int olen = ext4_fname_encrypted_size(inode, ilen); - if (!ctx) - return -EIO; - if (padding < EXT4_CRYPTO_BLOCK_SIZE) - padding = EXT4_CRYPTO_BLOCK_SIZE; - olen = ext4_fname_crypto_round_up(ilen, padding); crypto_str->len = olen; if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2; @@ -529,7 +309,7 @@ void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str) /** * ext4_fname_disk_to_usr() - converts a filename from disk space to user space */ -int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, +int _ext4_fname_disk_to_usr(struct inode *inode, struct dx_hash_info *hinfo, const struct ext4_str *iname, struct ext4_str *oname) @@ -537,8 +317,6 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, char buf[24]; int ret; - if (ctx == NULL) - return -EIO; if (iname->len < 3) { /*Check for . and .. */ if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') { @@ -548,8 +326,12 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, return oname->len; } } - if (ctx->has_valid_key) - return ext4_fname_decrypt(ctx, iname, oname); + if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) { + EXT4_ERROR_INODE(inode, "encrypted inode too small"); + return -EUCLEAN; + } + if (EXT4_I(inode)->i_crypt_info) + return ext4_fname_decrypt(inode, iname, oname); if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) { ret = digest_encode(iname->name, iname->len, oname->name); @@ -568,7 +350,7 @@ int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, return ret + 1; } -int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, +int ext4_fname_disk_to_usr(struct inode *inode, struct dx_hash_info *hinfo, const struct ext4_dir_entry_2 *de, struct ext4_str *oname) @@ -576,21 +358,20 @@ int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, struct ext4_str iname = {.name = (unsigned char *) de->name, .len = de->name_len }; - return _ext4_fname_disk_to_usr(ctx, hinfo, &iname, oname); + return _ext4_fname_disk_to_usr(inode, hinfo, &iname, oname); } /** * ext4_fname_usr_to_disk() - converts a filename from user space to disk space */ -int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, +int ext4_fname_usr_to_disk(struct inode *inode, const struct qstr *iname, struct ext4_str *oname) { int res; + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; - if (ctx == NULL) - return -EIO; if (iname->len < 3) { /*Check for . and .. */ if (iname->name[0] == '.' && @@ -601,8 +382,8 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, return oname->len; } } - if (ctx->has_valid_key) { - res = ext4_fname_encrypt(ctx, iname, oname); + if (ci) { + res = ext4_fname_encrypt(inode, iname, oname); return res; } /* Without a proper key, a user is not allowed to modify the filenames @@ -611,109 +392,79 @@ int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, return -EACCES; } -/* - * Calculate the htree hash from a filename from user space - */ -int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, - const struct qstr *iname, - struct dx_hash_info *hinfo) +int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct ext4_filename *fname) { - struct ext4_str tmp; - int ret = 0; - char buf[EXT4_FNAME_CRYPTO_DIGEST_SIZE+1]; + struct ext4_crypt_info *ci; + int ret = 0, bigname = 0; + + memset(fname, 0, sizeof(struct ext4_filename)); + fname->usr_fname = iname; - if (!ctx || + if (!ext4_encrypted_inode(dir) || ((iname->name[0] == '.') && ((iname->len == 1) || ((iname->name[1] == '.') && (iname->len == 2))))) { - ext4fs_dirhash(iname->name, iname->len, hinfo); + fname->disk_name.name = (unsigned char *) iname->name; + fname->disk_name.len = iname->len; return 0; } - - if (!ctx->has_valid_key && iname->name[0] == '_') { - if (iname->len != 33) - return -ENOENT; - ret = digest_decode(iname->name+1, iname->len, buf); - if (ret != 24) - return -ENOENT; - memcpy(&hinfo->hash, buf, 4); - memcpy(&hinfo->minor_hash, buf + 4, 4); + ret = ext4_get_encryption_info(dir); + if (ret) + return ret; + ci = EXT4_I(dir)->i_crypt_info; + if (ci) { + ret = ext4_fname_crypto_alloc_buffer(dir, iname->len, + &fname->crypto_buf); + if (ret < 0) + return ret; + ret = ext4_fname_encrypt(dir, iname, &fname->crypto_buf); + if (ret < 0) + goto errout; + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; return 0; } + if (!lookup) + return -EACCES; - if (!ctx->has_valid_key && iname->name[0] != '_') { - if (iname->len > 43) - return -ENOENT; - ret = digest_decode(iname->name, iname->len, buf); - ext4fs_dirhash(buf, ret, hinfo); - return 0; + /* We don't have the key and we are doing a lookup; decode the + * user-supplied name + */ + if (iname->name[0] == '_') + bigname = 1; + if ((bigname && (iname->len != 33)) || + (!bigname && (iname->len > 43))) + return -ENOENT; + + fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); + if (fname->crypto_buf.name == NULL) + return -ENOMEM; + ret = digest_decode(iname->name + bigname, iname->len - bigname, + fname->crypto_buf.name); + if (ret < 0) { + ret = -ENOENT; + goto errout; } - - /* First encrypt the plaintext name */ - ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp); - if (ret < 0) - return ret; - - ret = ext4_fname_encrypt(ctx, iname, &tmp); - if (ret >= 0) { - ext4fs_dirhash(tmp.name, tmp.len, hinfo); - ret = 0; + fname->crypto_buf.len = ret; + if (bigname) { + memcpy(&fname->hinfo.hash, fname->crypto_buf.name, 4); + memcpy(&fname->hinfo.minor_hash, fname->crypto_buf.name + 4, 4); + } else { + fname->disk_name.name = fname->crypto_buf.name; + fname->disk_name.len = fname->crypto_buf.len; } - - ext4_fname_crypto_free_buffer(&tmp); + return 0; +errout: + kfree(fname->crypto_buf.name); + fname->crypto_buf.name = NULL; return ret; } -int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr, - int len, const char * const name, - struct ext4_dir_entry_2 *de) +void ext4_fname_free_filename(struct ext4_filename *fname) { - int ret = -ENOENT; - int bigname = (*name == '_'); - - if (ctx->has_valid_key) { - if (cstr->name == NULL) { - struct qstr istr; - - ret = ext4_fname_crypto_alloc_buffer(ctx, len, cstr); - if (ret < 0) - goto errout; - istr.name = name; - istr.len = len; - ret = ext4_fname_encrypt(ctx, &istr, cstr); - if (ret < 0) - goto errout; - } - } else { - if (cstr->name == NULL) { - cstr->name = kmalloc(32, GFP_KERNEL); - if (cstr->name == NULL) - return -ENOMEM; - if ((bigname && (len != 33)) || - (!bigname && (len > 43))) - goto errout; - ret = digest_decode(name+bigname, len-bigname, - cstr->name); - if (ret < 0) { - ret = -ENOENT; - goto errout; - } - cstr->len = ret; - } - if (bigname) { - if (de->name_len < 16) - return 0; - ret = memcmp(de->name + de->name_len - 16, - cstr->name + 8, 16); - return (ret == 0) ? 1 : 0; - } - } - if (de->name_len != cstr->len) - return 0; - ret = memcmp(de->name, cstr->name, cstr->len); - return (ret == 0) ? 1 : 0; -errout: - kfree(cstr->name); - cstr->name = NULL; - return ret; + kfree(fname->crypto_buf.name); + fname->crypto_buf.name = NULL; + fname->usr_fname = NULL; + fname->disk_name.name = NULL; } diff --git a/kernel/fs/ext4/crypto_key.c b/kernel/fs/ext4/crypto_key.c index 52170d0b7..9a16d1e75 100644 --- a/kernel/fs/ext4/crypto_key.c +++ b/kernel/fs/ext4/crypto_key.c @@ -30,7 +30,7 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc) /** * ext4_derive_key_aes() - Derive a key using AES-128-ECB - * @deriving_key: Encryption key used for derivatio. + * @deriving_key: Encryption key used for derivation. * @source_key: Source key to which to apply derivation. * @derived_key: Derived key. * @@ -71,7 +71,6 @@ static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], EXT4_AES_256_XTS_KEY_SIZE, NULL); res = crypto_ablkcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { - BUG_ON(req->base.data != &ecr); wait_for_completion(&ecr.completion); res = ecr.res; } @@ -84,46 +83,115 @@ out: return res; } -/** - * ext4_generate_encryption_key() - generates an encryption key - * @inode: The inode to generate the encryption key for. - */ -int ext4_generate_encryption_key(struct inode *inode) +void ext4_free_crypt_info(struct ext4_crypt_info *ci) +{ + if (!ci) + return; + + if (ci->ci_keyring_key) + key_put(ci->ci_keyring_key); + crypto_free_ablkcipher(ci->ci_ctfm); + kmem_cache_free(ext4_crypt_info_cachep, ci); +} + +void ext4_free_encryption_info(struct inode *inode, + struct ext4_crypt_info *ci) { struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_encryption_key *crypt_key = &ei->i_encryption_key; + struct ext4_crypt_info *prev; + + if (ci == NULL) + ci = ACCESS_ONCE(ei->i_crypt_info); + if (ci == NULL) + return; + prev = cmpxchg(&ei->i_crypt_info, ci, NULL); + if (prev != ci) + return; + + ext4_free_crypt_info(ci); +} + +int _ext4_get_encryption_info(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_crypt_info *crypt_info; char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE + (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1]; struct key *keyring_key = NULL; struct ext4_encryption_key *master_key; struct ext4_encryption_context ctx; - struct user_key_payload *ukp; + const struct user_key_payload *ukp; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - &ctx, sizeof(ctx)); + struct crypto_ablkcipher *ctfm; + const char *cipher_str; + char raw_key[EXT4_MAX_KEY_SIZE]; + char mode; + int res; - if (res != sizeof(ctx)) { - if (res > 0) - res = -EINVAL; - goto out; + if (!ext4_read_workqueue) { + res = ext4_init_crypto(); + if (res) + return res; } + +retry: + crypt_info = ACCESS_ONCE(ei->i_crypt_info); + if (crypt_info) { + if (!crypt_info->ci_keyring_key || + key_validate(crypt_info->ci_keyring_key) == 0) + return 0; + ext4_free_encryption_info(inode, crypt_info); + goto retry; + } + + res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, + &ctx, sizeof(ctx)); + if (res < 0) { + if (!DUMMY_ENCRYPTION_ENABLED(sbi)) + return res; + ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; + ctx.filenames_encryption_mode = + EXT4_ENCRYPTION_MODE_AES_256_CTS; + ctx.flags = 0; + } else if (res != sizeof(ctx)) + return -EINVAL; res = 0; - ei->i_crypt_policy_flags = ctx.flags; + crypt_info = kmem_cache_alloc(ext4_crypt_info_cachep, GFP_KERNEL); + if (!crypt_info) + return -ENOMEM; + + crypt_info->ci_flags = ctx.flags; + crypt_info->ci_data_mode = ctx.contents_encryption_mode; + crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; + crypt_info->ci_ctfm = NULL; + crypt_info->ci_keyring_key = NULL; + memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, + sizeof(crypt_info->ci_master_key)); if (S_ISREG(inode->i_mode)) - crypt_key->mode = ctx.contents_encryption_mode; + mode = crypt_info->ci_data_mode; else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - crypt_key->mode = ctx.filenames_encryption_mode; - else { - printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n"); + mode = crypt_info->ci_filename_mode; + else BUG(); + switch (mode) { + case EXT4_ENCRYPTION_MODE_AES_256_XTS: + cipher_str = "xts(aes)"; + break; + case EXT4_ENCRYPTION_MODE_AES_256_CTS: + cipher_str = "cts(cbc(aes))"; + break; + default: + printk_once(KERN_WARNING + "ext4: unsupported key mode %d (ino %u)\n", + mode, (unsigned) inode->i_ino); + res = -ENOKEY; + goto out; } - crypt_key->size = ext4_encryption_key_size(crypt_key->mode); - BUG_ON(!crypt_key->size); if (DUMMY_ENCRYPTION_ENABLED(sbi)) { - memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE); - goto out; + memset(raw_key, 0x42, EXT4_AES_256_XTS_KEY_SIZE); + goto got_key; } memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX, EXT4_KEY_DESC_PREFIX_SIZE); @@ -138,29 +206,71 @@ int ext4_generate_encryption_key(struct inode *inode) keyring_key = NULL; goto out; } - BUG_ON(keyring_key->type != &key_type_logon); - ukp = ((struct user_key_payload *)keyring_key->payload.data); + crypt_info->ci_keyring_key = keyring_key; + if (keyring_key->type != &key_type_logon) { + printk_once(KERN_WARNING + "ext4: key type must be logon\n"); + res = -ENOKEY; + goto out; + } + down_read(&keyring_key->sem); + ukp = user_key_payload(keyring_key); if (ukp->datalen != sizeof(struct ext4_encryption_key)) { res = -EINVAL; + up_read(&keyring_key->sem); goto out; } master_key = (struct ext4_encryption_key *)ukp->data; BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != EXT4_KEY_DERIVATION_NONCE_SIZE); - BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE); - res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw); + if (master_key->size != EXT4_AES_256_XTS_KEY_SIZE) { + printk_once(KERN_WARNING + "ext4: key size incorrect: %d\n", + master_key->size); + res = -ENOKEY; + up_read(&keyring_key->sem); + goto out; + } + res = ext4_derive_key_aes(ctx.nonce, master_key->raw, + raw_key); + up_read(&keyring_key->sem); + if (res) + goto out; +got_key: + ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0); + if (!ctfm || IS_ERR(ctfm)) { + res = ctfm ? PTR_ERR(ctfm) : -ENOMEM; + printk(KERN_DEBUG + "%s: error %d (inode %u) allocating crypto tfm\n", + __func__, res, (unsigned) inode->i_ino); + goto out; + } + crypt_info->ci_ctfm = ctfm; + crypto_ablkcipher_clear_flags(ctfm, ~0); + crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm), + CRYPTO_TFM_REQ_WEAK_KEY); + res = crypto_ablkcipher_setkey(ctfm, raw_key, + ext4_encryption_key_size(mode)); + if (res) + goto out; + memzero_explicit(raw_key, sizeof(raw_key)); + if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) { + ext4_free_crypt_info(crypt_info); + goto retry; + } + return 0; + out: - if (keyring_key) - key_put(keyring_key); - if (res < 0) - crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID; + if (res == -ENOKEY) + res = 0; + ext4_free_crypt_info(crypt_info); + memzero_explicit(raw_key, sizeof(raw_key)); return res; } int ext4_has_encryption_key(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_encryption_key *crypt_key = &ei->i_encryption_key; - return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID); + return (ei->i_crypt_info != NULL); } diff --git a/kernel/fs/ext4/crypto_policy.c b/kernel/fs/ext4/crypto_policy.c index a6d6291ae..ad0506981 100644 --- a/kernel/fs/ext4/crypto_policy.c +++ b/kernel/fs/ext4/crypto_policy.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/types.h> +#include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" @@ -49,7 +50,12 @@ static int ext4_create_encryption_context_from_policy( struct inode *inode, const struct ext4_encryption_policy *policy) { struct ext4_encryption_context ctx; - int res = 0; + handle_t *handle; + int res, res2; + + res = ext4_convert_inline_data(inode); + if (res) + return res; ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, @@ -74,11 +80,22 @@ static int ext4_create_encryption_context_from_policy( BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE); get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); + handle = ext4_journal_start(inode, EXT4_HT_MISC, + ext4_jbd2_credits_xattr(inode)); + if (IS_ERR(handle)) + return PTR_ERR(handle); res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, sizeof(ctx), 0); - if (!res) + if (!res) { ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); + res = ext4_mark_inode_dirty(handle, inode); + if (res) + EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); + } + res2 = ext4_journal_stop(handle); + if (!res) + res = res2; return res; } @@ -89,6 +106,8 @@ int ext4_process_policy(const struct ext4_encryption_policy *policy, return -EINVAL; if (!ext4_inode_has_encryption_context(inode)) { + if (!S_ISDIR(inode->i_mode)) + return -EINVAL; if (!ext4_empty_dir(inode)) return -ENOTEMPTY; return ext4_create_encryption_context_from_policy(inode, @@ -126,36 +145,39 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy) int ext4_is_child_context_consistent_with_parent(struct inode *parent, struct inode *child) { - struct ext4_encryption_context parent_ctx, child_ctx; + struct ext4_crypt_info *parent_ci, *child_ci; int res; if ((parent == NULL) || (child == NULL)) { pr_err("parent %p child %p\n", parent, child); - BUG_ON(1); + WARN_ON(1); /* Should never happen */ + return 0; } /* no restrictions if the parent directory is not encrypted */ if (!ext4_encrypted_inode(parent)) return 1; - res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - &parent_ctx, sizeof(parent_ctx)); - if (res != sizeof(parent_ctx)) - return 0; /* if the child directory is not encrypted, this is always a problem */ if (!ext4_encrypted_inode(child)) return 0; - res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - &child_ctx, sizeof(child_ctx)); - if (res != sizeof(child_ctx)) + res = ext4_get_encryption_info(parent); + if (res) + return 0; + res = ext4_get_encryption_info(child); + if (res) + return 0; + parent_ci = EXT4_I(parent)->i_crypt_info; + child_ci = EXT4_I(child)->i_crypt_info; + if (!parent_ci && !child_ci) + return 1; + if (!parent_ci || !child_ci) return 0; - return (memcmp(parent_ctx.master_key_descriptor, - child_ctx.master_key_descriptor, + + return (memcmp(parent_ci->ci_master_key, + child_ci->ci_master_key, EXT4_KEY_DESCRIPTOR_SIZE) == 0 && - (parent_ctx.contents_encryption_mode == - child_ctx.contents_encryption_mode) && - (parent_ctx.filenames_encryption_mode == - child_ctx.filenames_encryption_mode)); + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags)); } /** @@ -168,31 +190,40 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent, int ext4_inherit_context(struct inode *parent, struct inode *child) { struct ext4_encryption_context ctx; - int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - &ctx, sizeof(ctx)); + struct ext4_crypt_info *ci; + int res; - if (res != sizeof(ctx)) { - if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) { - ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; - ctx.contents_encryption_mode = - EXT4_ENCRYPTION_MODE_AES_256_XTS; - ctx.filenames_encryption_mode = - EXT4_ENCRYPTION_MODE_AES_256_CTS; - ctx.flags = 0; - memset(ctx.master_key_descriptor, 0x42, - EXT4_KEY_DESCRIPTOR_SIZE); - res = 0; - } else { - goto out; - } + res = ext4_get_encryption_info(parent); + if (res < 0) + return res; + ci = EXT4_I(parent)->i_crypt_info; + if (ci == NULL) + return -ENOKEY; + + ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; + if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) { + ctx.contents_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; + ctx.filenames_encryption_mode = + EXT4_ENCRYPTION_MODE_AES_256_CTS; + ctx.flags = 0; + memset(ctx.master_key_descriptor, 0x42, + EXT4_KEY_DESCRIPTOR_SIZE); + res = 0; + } else { + ctx.contents_encryption_mode = ci->ci_data_mode; + ctx.filenames_encryption_mode = ci->ci_filename_mode; + ctx.flags = ci->ci_flags; + memcpy(ctx.master_key_descriptor, ci->ci_master_key, + EXT4_KEY_DESCRIPTOR_SIZE); } get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION, EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, sizeof(ctx), 0); -out: - if (!res) + if (!res) { ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT); + ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA); + res = ext4_get_encryption_info(child); + } return res; } diff --git a/kernel/fs/ext4/dir.c b/kernel/fs/ext4/dir.c index 5665d82d2..1d1bca74f 100644 --- a/kernel/fs/ext4/dir.c +++ b/kernel/fs/ext4/dir.c @@ -40,8 +40,7 @@ static int is_dx_dir(struct inode *inode) { struct super_block *sb = inode->i_sb; - if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX) && + if (ext4_has_feature_dir_index(inode->i_sb) && ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || ((inode->i_size >> sb->s_blocksize_bits) == 1) || ext4_has_inline_data(inode))) @@ -110,7 +109,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; int dir_has_error = 0; - struct ext4_fname_crypto_ctx *enc_ctx = NULL; struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; if (is_dx_dir(inode)) { @@ -134,16 +132,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return err; } - enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN); - if (IS_ERR(enc_ctx)) - return PTR_ERR(enc_ctx); - if (enc_ctx) { - err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN, + if (ext4_encrypted_inode(inode)) { + err = ext4_fname_crypto_alloc_buffer(inode, EXT4_NAME_LEN, &fname_crypto_str); - if (err < 0) { - ext4_put_fname_crypto_ctx(&enc_ctx); + if (err < 0) return err; - } } offset = ctx->pos & (sb->s_blocksize - 1); @@ -239,17 +232,19 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); if (le32_to_cpu(de->inode)) { - if (enc_ctx == NULL) { - /* Directory is not encrypted */ + if (!ext4_encrypted_inode(inode)) { if (!dir_emit(ctx, de->name, de->name_len, le32_to_cpu(de->inode), get_dtype(sb, de->file_type))) goto done; } else { + int save_len = fname_crypto_str.len; + /* Directory is encrypted */ - err = ext4_fname_disk_to_usr(enc_ctx, + err = ext4_fname_disk_to_usr(inode, NULL, de, &fname_crypto_str); + fname_crypto_str.len = save_len; if (err < 0) goto errout; if (!dir_emit(ctx, @@ -272,7 +267,6 @@ done: err = 0; errout: #ifdef CONFIG_EXT4_FS_ENCRYPTION - ext4_put_fname_crypto_ctx(&enc_ctx); ext4_fname_crypto_free_buffer(&fname_crypto_str); #endif brelse(bh); @@ -598,6 +592,13 @@ finished: return 0; } +static int ext4_dir_open(struct inode * inode, struct file * filp) +{ + if (ext4_encrypted_inode(inode)) + return ext4_get_encryption_info(inode) ? -EACCES : 0; + return 0; +} + static int ext4_release_dir(struct inode *inode, struct file *filp) { if (filp->private_data) @@ -619,14 +620,14 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, while ((char *) de < top) { if (ext4_check_dir_entry(dir, NULL, de, bh, buf, buf_size, offset)) - return -EIO; + return -EFSCORRUPTED; nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); de = (struct ext4_dir_entry_2 *)((char *)de + rlen); offset += rlen; } if ((char *) de > top) - return -EIO; + return -EFSCORRUPTED; return 0; } @@ -640,5 +641,6 @@ const struct file_operations ext4_dir_operations = { .compat_ioctl = ext4_compat_ioctl, #endif .fsync = ext4_sync_file, + .open = ext4_dir_open, .release = ext4_release_dir, }; diff --git a/kernel/fs/ext4/ext4.h b/kernel/fs/ext4/ext4.h index 9a83f149a..cc7ca4e87 100644 --- a/kernel/fs/ext4/ext4.h +++ b/kernel/fs/ext4/ext4.h @@ -26,6 +26,7 @@ #include <linux/seqlock.h> #include <linux/mutex.h> #include <linux/timer.h> +#include <linux/version.h> #include <linux/wait.h> #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h> @@ -69,15 +70,6 @@ #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif -#define EXT4_ERROR_INODE(inode, fmt, a...) \ - ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) - -#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ - ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) - -#define EXT4_ERROR_FILE(file, block, fmt, a...) \ - ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) - /* data type for block offset of block group */ typedef int ext4_grpblk_t; @@ -90,6 +82,11 @@ typedef __u32 ext4_lblk_t; /* data type for block group number */ typedef unsigned int ext4_group_t; +enum SHIFT_DIRECTION { + SHIFT_LEFT = 0, + SHIFT_RIGHT, +}; + /* * Flags used in mballoc's allocation_context flags field. * @@ -191,7 +188,7 @@ typedef struct ext4_io_end { } ext4_io_end_t; struct ext4_io_submit { - int io_op; + struct writeback_control *io_wbc; struct bio *io_bio; ext4_io_end_t *io_end; sector_t io_next_block; @@ -378,6 +375,7 @@ struct flex_groups { #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ +#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ @@ -435,6 +433,7 @@ enum { EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ + EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */ EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ }; @@ -479,6 +478,7 @@ static inline void ext4_check_flag_values(void) CHECK_FLAG_VALUE(EA_INODE); CHECK_FLAG_VALUE(EOFBLOCKS); CHECK_FLAG_VALUE(INLINE_DATA); + CHECK_FLAG_VALUE(PROJINHERIT); CHECK_FLAG_VALUE(RESERVED); } @@ -696,6 +696,7 @@ struct ext4_inode { __le32 i_crtime; /* File Creation time */ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ __le32 i_version_hi; /* high 32 bits for 64-bit version */ + __le32 i_projid; /* Project ID */ }; struct move_extent { @@ -727,19 +728,55 @@ struct move_extent { <= (EXT4_GOOD_OLD_INODE_SIZE + \ (einode)->i_extra_isize)) \ +/* + * We use an encoding that preserves the times for extra epoch "00": + * + * extra msb of adjust for signed + * epoch 32-bit 32-bit tv_sec to + * bits time decoded 64-bit tv_sec 64-bit tv_sec valid time range + * 0 0 1 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31 + * 0 0 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19 + * 0 1 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07 + * 0 1 0 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25 + * 1 0 1 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16 + * 1 0 0 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04 + * 1 1 1 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22 + * 1 1 0 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10 + * + * Note that previous versions of the kernel on 64-bit systems would + * incorrectly use extra epoch bits 1,1 for dates between 1901 and + * 1970. e2fsck will correct this, assuming that it is run on the + * affected filesystem before 2242. + */ + static inline __le32 ext4_encode_extra_time(struct timespec *time) { - return cpu_to_le32((sizeof(time->tv_sec) > 4 ? - (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | - ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); + u32 extra = sizeof(time->tv_sec) > 4 ? + ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; + return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); } static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) { - if (sizeof(time->tv_sec) > 4) - time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) - << 32; - time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + if (unlikely(sizeof(time->tv_sec) > 4 && + (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0) + /* Handle legacy encoding of pre-1970 dates with epoch + * bits 1,1. We assume that by kernel version 4.20, + * everyone will have run fsck over the affected + * filesystems to correct the problem. (This + * backwards compatibility may be removed before this + * time, at the discretion of the ext4 developers.) + */ + u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK; + if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0) + extra_bits = 0; + time->tv_sec += extra_bits << 32; +#else + time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; +#endif + } + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; } #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ @@ -911,7 +948,6 @@ struct ext4_inode_info { /* on-disk additional length */ __u16 i_extra_isize; - char i_crypt_policy_flags; /* Indicate the inline data space. */ u16 i_inline_off; @@ -955,7 +991,7 @@ struct ext4_inode_info { #ifdef CONFIG_EXT4_FS_ENCRYPTION /* Encryption params */ - struct ext4_encryption_key i_encryption_key; + struct ext4_crypt_info *i_crypt_info; #endif }; @@ -1024,6 +1060,9 @@ struct ext4_inode_info { #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated file systems */ +#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly + specified journal checksum */ + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ @@ -1184,7 +1223,9 @@ struct ext4_super_block { __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ __u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ __le32 s_lpf_ino; /* Location of the lost+found inode */ - __le32 s_reserved[100]; /* Padding to the end of the block */ + __le32 s_prj_quota_inum; /* inode for tracking project quota */ + __le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */ + __le32 s_reserved[98]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; @@ -1374,12 +1415,6 @@ struct ext4_sb_info { struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - -#ifdef CONFIG_EXT4_FS_ENCRYPTION - /* Encryption */ - uint32_t s_file_encryption_mode; - uint32_t s_dir_encryption_mode; -#endif }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1533,6 +1568,7 @@ static inline int ext4_encrypted_inode(struct inode *inode) * Feature set definitions */ +/* Use the ext4_{has,set,clear}_feature_* helpers; these will be removed */ #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0) #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ @@ -1577,6 +1613,7 @@ static inline int ext4_encrypted_inode(struct inode *inode) */ #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 +#define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -1589,11 +1626,99 @@ static inline int ext4_encrypted_inode(struct inode *inode) #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ -#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ +#define EXT4_FEATURE_INCOMPAT_CSUM_SEED 0x2000 #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 +#define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \ +static inline bool ext4_has_feature_##name(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_compat & \ + cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname)) != 0); \ +} \ +static inline void ext4_set_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_compat |= \ + cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \ +} \ +static inline void ext4_clear_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_compat &= \ + ~cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \ +} + +#define EXT4_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ +static inline bool ext4_has_feature_##name(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_ro_compat & \ + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname)) != 0); \ +} \ +static inline void ext4_set_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_ro_compat |= \ + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \ +} \ +static inline void ext4_clear_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_ro_compat &= \ + ~cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \ +} + +#define EXT4_FEATURE_INCOMPAT_FUNCS(name, flagname) \ +static inline bool ext4_has_feature_##name(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_incompat & \ + cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname)) != 0); \ +} \ +static inline void ext4_set_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_incompat |= \ + cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \ +} \ +static inline void ext4_clear_feature_##name(struct super_block *sb) \ +{ \ + EXT4_SB(sb)->s_es->s_feature_incompat &= \ + ~cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \ +} + +EXT4_FEATURE_COMPAT_FUNCS(dir_prealloc, DIR_PREALLOC) +EXT4_FEATURE_COMPAT_FUNCS(imagic_inodes, IMAGIC_INODES) +EXT4_FEATURE_COMPAT_FUNCS(journal, HAS_JOURNAL) +EXT4_FEATURE_COMPAT_FUNCS(xattr, EXT_ATTR) +EXT4_FEATURE_COMPAT_FUNCS(resize_inode, RESIZE_INODE) +EXT4_FEATURE_COMPAT_FUNCS(dir_index, DIR_INDEX) +EXT4_FEATURE_COMPAT_FUNCS(sparse_super2, SPARSE_SUPER2) + +EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super, SPARSE_SUPER) +EXT4_FEATURE_RO_COMPAT_FUNCS(large_file, LARGE_FILE) +EXT4_FEATURE_RO_COMPAT_FUNCS(btree_dir, BTREE_DIR) +EXT4_FEATURE_RO_COMPAT_FUNCS(huge_file, HUGE_FILE) +EXT4_FEATURE_RO_COMPAT_FUNCS(gdt_csum, GDT_CSUM) +EXT4_FEATURE_RO_COMPAT_FUNCS(dir_nlink, DIR_NLINK) +EXT4_FEATURE_RO_COMPAT_FUNCS(extra_isize, EXTRA_ISIZE) +EXT4_FEATURE_RO_COMPAT_FUNCS(quota, QUOTA) +EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC) +EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM) +EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY) +EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT) + +EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION) +EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE) +EXT4_FEATURE_INCOMPAT_FUNCS(journal_needs_recovery, RECOVER) +EXT4_FEATURE_INCOMPAT_FUNCS(journal_dev, JOURNAL_DEV) +EXT4_FEATURE_INCOMPAT_FUNCS(meta_bg, META_BG) +EXT4_FEATURE_INCOMPAT_FUNCS(extents, EXTENTS) +EXT4_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) +EXT4_FEATURE_INCOMPAT_FUNCS(mmp, MMP) +EXT4_FEATURE_INCOMPAT_FUNCS(flex_bg, FLEX_BG) +EXT4_FEATURE_INCOMPAT_FUNCS(ea_inode, EA_INODE) +EXT4_FEATURE_INCOMPAT_FUNCS(dirdata, DIRDATA) +EXT4_FEATURE_INCOMPAT_FUNCS(csum_seed, CSUM_SEED) +EXT4_FEATURE_INCOMPAT_FUNCS(largedir, LARGEDIR) +EXT4_FEATURE_INCOMPAT_FUNCS(inline_data, INLINE_DATA) +EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT) + #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_META_BG) @@ -1609,7 +1734,7 @@ static inline int ext4_encrypted_inode(struct inode *inode) EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) -#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#define EXT4_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_RECOVER| \ EXT4_FEATURE_INCOMPAT_META_BG| \ @@ -1618,7 +1743,8 @@ static inline int ext4_encrypted_inode(struct inode *inode) EXT4_FEATURE_INCOMPAT_FLEX_BG| \ EXT4_FEATURE_INCOMPAT_MMP | \ EXT4_FEATURE_INCOMPAT_INLINE_DATA | \ - EXT4_FEATURE_INCOMPAT_ENCRYPT) + EXT4_FEATURE_INCOMPAT_ENCRYPT | \ + EXT4_FEATURE_INCOMPAT_CSUM_SEED) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ @@ -1630,6 +1756,40 @@ static inline int ext4_encrypted_inode(struct inode *inode) EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ EXT4_FEATURE_RO_COMPAT_QUOTA) +#define EXTN_FEATURE_FUNCS(ver) \ +static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_compat & \ + cpu_to_le32(~EXT##ver##_FEATURE_COMPAT_SUPP)) != 0); \ +} \ +static inline bool ext4_has_unknown_ext##ver##_ro_compat_features(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_ro_compat & \ + cpu_to_le32(~EXT##ver##_FEATURE_RO_COMPAT_SUPP)) != 0); \ +} \ +static inline bool ext4_has_unknown_ext##ver##_incompat_features(struct super_block *sb) \ +{ \ + return ((EXT4_SB(sb)->s_es->s_feature_incompat & \ + cpu_to_le32(~EXT##ver##_FEATURE_INCOMPAT_SUPP)) != 0); \ +} + +EXTN_FEATURE_FUNCS(2) +EXTN_FEATURE_FUNCS(3) +EXTN_FEATURE_FUNCS(4) + +static inline bool ext4_has_compat_features(struct super_block *sb) +{ + return (EXT4_SB(sb)->s_es->s_feature_compat != 0); +} +static inline bool ext4_has_ro_compat_features(struct super_block *sb) +{ + return (EXT4_SB(sb)->s_es->s_feature_ro_compat != 0); +} +static inline bool ext4_has_incompat_features(struct super_block *sb) +{ + return (EXT4_SB(sb)->s_es->s_feature_incompat != 0); +} + /* * Default values for user and/or group using reserved blocks */ @@ -1780,8 +1940,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) * (c) Daniel Phillips, 2001 */ -#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ - EXT4_FEATURE_COMPAT_DIR_INDEX) && \ +#define is_dx(dir) (ext4_has_feature_dir_index((dir)->i_sb) && \ ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) @@ -1838,6 +1997,17 @@ struct dx_hash_info */ #define HASH_NB_ALWAYS 1 +struct ext4_filename { + const struct qstr *usr_fname; + struct ext4_str disk_name; + struct dx_hash_info hinfo; +#ifdef CONFIG_EXT4_FS_ENCRYPTION + struct ext4_str crypto_buf; +#endif +}; + +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) /* * Describe an inode's exact location on disk and in memory @@ -2054,6 +2224,7 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy); /* crypto.c */ +extern struct kmem_cache *ext4_crypt_info_cachep; bool ext4_valid_contents_enc_mode(uint32_t mode); uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size); extern struct workqueue_struct *ext4_read_workqueue; @@ -2062,8 +2233,7 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx); void ext4_restore_control_page(struct page *data_page); struct page *ext4_encrypt(struct inode *inode, struct page *plaintext_page); -int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page); -int ext4_decrypt_one(struct inode *inode, struct page *page); +int ext4_decrypt(struct page *page); int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -2071,7 +2241,7 @@ int ext4_init_crypto(void); void ext4_exit_crypto(void); static inline int ext4_sb_has_crypto(struct super_block *sb) { - return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT); + return ext4_has_feature_encrypt(sb); } #else static inline int ext4_init_crypto(void) { return 0; } @@ -2085,57 +2255,84 @@ static inline int ext4_sb_has_crypto(struct super_block *sb) /* crypto_fname.c */ bool ext4_valid_filenames_enc_mode(uint32_t mode); u32 ext4_fname_crypto_round_up(u32 size, u32 blksize); -int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx, +unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen); +int ext4_fname_crypto_alloc_buffer(struct inode *inode, u32 ilen, struct ext4_str *crypto_str); -int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, +int _ext4_fname_disk_to_usr(struct inode *inode, struct dx_hash_info *hinfo, const struct ext4_str *iname, struct ext4_str *oname); -int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, +int ext4_fname_disk_to_usr(struct inode *inode, struct dx_hash_info *hinfo, const struct ext4_dir_entry_2 *de, struct ext4_str *oname); -int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, +int ext4_fname_usr_to_disk(struct inode *inode, const struct qstr *iname, struct ext4_str *oname); -int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, - const struct qstr *iname, - struct dx_hash_info *hinfo); -int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, - u32 namelen); -int ext4_fname_match(struct ext4_fname_crypto_ctx *ctx, struct ext4_str *cstr, - int len, const char * const name, - struct ext4_dir_entry_2 *de); - - #ifdef CONFIG_EXT4_FS_ENCRYPTION -void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx); -struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode, - u32 max_len); void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str); +int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct ext4_filename *fname); +void ext4_fname_free_filename(struct ext4_filename *fname); #else static inline -void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { } -static inline -struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode, - u32 max_len) +int ext4_setup_fname_crypto(struct inode *inode) { - return NULL; + return 0; } static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { } +static inline int ext4_fname_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct ext4_filename *fname) +{ + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *) iname->name; + fname->disk_name.len = iname->len; + return 0; +} +static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } #endif /* crypto_key.c */ -int ext4_generate_encryption_key(struct inode *inode); +void ext4_free_crypt_info(struct ext4_crypt_info *ci); +void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci); +int _ext4_get_encryption_info(struct inode *inode); #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_has_encryption_key(struct inode *inode); + +static inline int ext4_get_encryption_info(struct inode *inode) +{ + struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; + + if (!ci || + (ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD))))) + return _ext4_get_encryption_info(inode); + return 0; +} + +static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) +{ + return EXT4_I(inode)->i_crypt_info; +} + #else static inline int ext4_has_encryption_key(struct inode *inode) { return 0; } +static inline int ext4_get_encryption_info(struct inode *inode) +{ + return 0; +} +static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) +{ + return NULL; +} #endif @@ -2156,18 +2353,16 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p); extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, struct buffer_head *bh, void *buf, int buf_size, - const char *name, int namelen, + struct ext4_filename *fname, struct ext4_dir_entry_2 **dest_de); int ext4_insert_dentry(struct inode *dir, - struct inode *inode, - struct ext4_dir_entry_2 *de, - int buf_size, - const struct qstr *iname, - const char *name, int namelen); + struct inode *inode, + struct ext4_dir_entry_2 *de, + int buf_size, + struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX)) + if (!ext4_has_feature_dir_index(inode->i_sb)) ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); } static unsigned char ext4_filetype_table[] = { @@ -2176,8 +2371,7 @@ static unsigned char ext4_filetype_table[] = { static inline unsigned char get_dtype(struct super_block *sb, int filetype) { - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || - (filetype >= EXT4_FT_MAX)) + if (!ext4_has_feature_filetype(sb) || filetype >= EXT4_FT_MAX) return DT_UNKNOWN; return ext4_filetype_table[filetype]; @@ -2218,6 +2412,7 @@ extern int ext4_init_inode_table(struct super_block *sb, extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); /* mballoc.c */ +extern const struct file_operations ext4_seq_mb_groups_fops; extern long ext4_mb_stats; extern long ext4_mb_max_to_scan; extern int ext4_mb_init(struct super_block *); @@ -2245,6 +2440,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); +int ext4_get_block_dax(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, @@ -2317,13 +2514,14 @@ extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, __u32 start_minor_hash, __u32 *next_hash); -extern int search_dir(struct buffer_head *bh, - char *search_buf, - int buf_size, - struct inode *dir, - const struct qstr *d_name, - unsigned int offset, - struct ext4_dir_entry_2 **res_dir); +extern int ext4_search_dir(struct buffer_head *bh, + char *search_buf, + int buf_size, + struct inode *dir, + struct ext4_filename *fname, + const struct qstr *d_name, + unsigned int offset, + struct ext4_dir_entry_2 **res_dir); extern int ext4_generic_delete_entry(handle_t *handle, struct inode *dir, struct ext4_dir_entry_2 *de_del, @@ -2342,6 +2540,7 @@ extern int ext4_group_extend(struct super_block *sb, extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); extern void ext4_superblock_csum_set(struct super_block *sb); extern void *ext4_kvmalloc(size_t size, gfp_t flags); @@ -2368,6 +2567,9 @@ void __ext4_abort(struct super_block *, const char *, unsigned int, extern __printf(4, 5) void __ext4_warning(struct super_block *, const char *, unsigned int, const char *, ...); +extern __printf(4, 5) +void __ext4_warning_inode(const struct inode *inode, const char *function, + unsigned int line, const char *fmt, ...); extern __printf(3, 4) void __ext4_msg(struct super_block *, const char *, const char *, ...); extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, @@ -2378,6 +2580,15 @@ void __ext4_grp_locked_error(const char *, unsigned int, unsigned long, ext4_fsblk_t, const char *, ...); +#define EXT4_ERROR_INODE(inode, fmt, a...) \ + ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) + +#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ + ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) + +#define EXT4_ERROR_FILE(file, block, fmt, a...) \ + ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) + #ifdef CONFIG_PRINTK #define ext4_error_inode(inode, func, line, block, fmt, ...) \ @@ -2390,6 +2601,8 @@ void __ext4_grp_locked_error(const char *, unsigned int, __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) #define ext4_warning(sb, fmt, ...) \ __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) +#define ext4_warning_inode(inode, fmt, ...) \ + __ext4_warning_inode(inode, __func__, __LINE__, fmt, ##__VA_ARGS__) #define ext4_msg(sb, level, fmt, ...) \ __ext4_msg(sb, level, fmt, ##__VA_ARGS__) #define dump_mmp_msg(sb, mmp, msg) \ @@ -2425,6 +2638,11 @@ do { \ no_printk(fmt, ##__VA_ARGS__); \ __ext4_warning(sb, "", 0, " "); \ } while (0) +#define ext4_warning_inode(inode, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_warning_inode(inode, "", 0, " "); \ +} while (0) #define ext4_msg(sb, level, fmt, ...) \ do { \ no_printk(fmt, ##__VA_ARGS__); \ @@ -2485,15 +2703,13 @@ extern int ext4_register_li_request(struct super_block *sb, static inline int ext4_has_group_desc_csum(struct super_block *sb) { - return EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || - (EXT4_SB(sb)->s_chksum_driver != NULL); + return ext4_has_feature_gdt_csum(sb) || + EXT4_SB(sb)->s_chksum_driver != NULL; } static inline int ext4_has_metadata_csum(struct super_block *sb) { - WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) && !EXT4_SB(sb)->s_chksum_driver); return (EXT4_SB(sb)->s_chksum_driver != NULL); @@ -2768,7 +2984,9 @@ extern int ext4_da_write_inline_data_begin(struct address_space *mapping, extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page); -extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, +extern int ext4_try_add_inline_entry(handle_t *handle, + struct ext4_filename *fname, + struct dentry *dentry, struct inode *inode); extern int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, @@ -2782,6 +3000,7 @@ extern int htree_inlinedir_to_tree(struct file *dir_file, __u32 start_hash, __u32 start_minor_hash, int *has_inline_data); extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, + struct ext4_filename *fname, const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *has_inline_data); @@ -2837,7 +3056,7 @@ static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { static inline void ext4_set_de_type(struct super_block *sb, struct ext4_dir_entry_2 *de, umode_t mode) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) + if (ext4_has_feature_filetype(sb)) de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; } @@ -2847,9 +3066,16 @@ extern int ext4_mpage_readpages(struct address_space *mapping, unsigned nr_pages); /* symlink.c */ +extern const struct inode_operations ext4_encrypted_symlink_inode_operations; extern const struct inode_operations ext4_symlink_inode_operations; extern const struct inode_operations ext4_fast_symlink_inode_operations; +/* sysfs.c */ +extern int ext4_register_sysfs(struct super_block *sb); +extern void ext4_unregister_sysfs(struct super_block *sb); +extern int __init ext4_init_sysfs(void); +extern void ext4_exit_sysfs(void); + /* block_validity */ extern void ext4_release_system_zone(struct super_block *sb); extern int ext4_setup_system_zone(struct super_block *sb); @@ -2912,6 +3138,7 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); extern int ext4_ext_precache(struct inode *inode); extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); +extern int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, ext4_lblk_t count, @@ -2995,4 +3222,7 @@ extern void ext4_resize_end(struct super_block *sb); #endif /* __KERNEL__ */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + #endif /* _EXT4_H */ diff --git a/kernel/fs/ext4/ext4_crypto.h b/kernel/fs/ext4/ext4_crypto.h index d75159c10..ac7d4e813 100644 --- a/kernel/fs/ext4/ext4_crypto.h +++ b/kernel/fs/ext4/ext4_crypto.h @@ -66,24 +66,39 @@ struct ext4_encryption_context { #define EXT4_KEY_DESC_PREFIX "ext4:" #define EXT4_KEY_DESC_PREFIX_SIZE 5 +/* This is passed in from userspace into the kernel keyring */ struct ext4_encryption_key { - uint32_t mode; - char raw[EXT4_MAX_KEY_SIZE]; - uint32_t size; + __u32 mode; + char raw[EXT4_MAX_KEY_SIZE]; + __u32 size; +} __attribute__((__packed__)); + +struct ext4_crypt_info { + char ci_data_mode; + char ci_filename_mode; + char ci_flags; + struct crypto_ablkcipher *ci_ctfm; + struct key *ci_keyring_key; + char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE]; }; #define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 +#define EXT4_WRITE_PATH_FL 0x00000002 struct ext4_crypto_ctx { - struct crypto_tfm *tfm; /* Crypto API context */ - struct page *bounce_page; /* Ciphertext page on write path */ - struct page *control_page; /* Original page on write path */ - struct bio *bio; /* The bio for this context */ - struct work_struct work; /* Work queue for read complete path */ - struct list_head free_list; /* Free list */ - int flags; /* Flags */ - int mode; /* Encryption mode for tfm */ + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + char flags; /* Flags */ + char mode; /* Encryption mode for tfm */ }; struct ext4_completion_result { @@ -121,18 +136,6 @@ struct ext4_str { u32 len; }; -struct ext4_fname_crypto_ctx { - u32 lim; - char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE]; - struct crypto_ablkcipher *ctfm; - struct crypto_hash *htfm; - struct page *workpage; - struct ext4_encryption_key key; - unsigned flags : 8; - unsigned has_valid_key : 1; - unsigned ctfm_key_is_ready : 1; -}; - /** * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. diff --git a/kernel/fs/ext4/ext4_jbd2.c b/kernel/fs/ext4/ext4_jbd2.c index d41843181..e770c1ee4 100644 --- a/kernel/fs/ext4/ext4_jbd2.c +++ b/kernel/fs/ext4/ext4_jbd2.c @@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) return 0; } + err = handle->h_err; if (!handle->h_transaction) { - err = jbd2_journal_stop(handle); - return handle->h_err ? handle->h_err : err; + rc = jbd2_journal_stop(handle); + return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; rc = jbd2_journal_stop(handle); if (!err) diff --git a/kernel/fs/ext4/ext4_jbd2.h b/kernel/fs/ext4/ext4_jbd2.h index 9c5b49fb2..5f5846211 100644 --- a/kernel/fs/ext4/ext4_jbd2.h +++ b/kernel/fs/ext4/ext4_jbd2.h @@ -34,8 +34,7 @@ */ #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ - (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ - ? 20U : 8U) + (ext4_has_feature_extents(sb) ? 20U : 8U) /* Extended attribute operations touch at most two data buffers, * two bitmap buffers, and two group summaries, in addition to the inode @@ -84,17 +83,16 @@ /* Amount of blocks needed for quota update - we know that the structure was * allocated so we need to update only data block */ #define EXT4_QUOTA_TRANS_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ - 1 : 0) + ext4_has_feature_quota(sb)) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + ext4_has_feature_quota(sb)) ?\ (DQUOT_INIT_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_INIT_REWRITE) : 0) #define EXT4_QUOTA_DEL_BLOCKS(sb) ((test_opt(sb, QUOTA) ||\ - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) ?\ + ext4_has_feature_quota(sb)) ?\ (DQUOT_DEL_ALLOC*(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)\ +3+DQUOT_DEL_REWRITE) : 0) #else diff --git a/kernel/fs/ext4/extents.c b/kernel/fs/ext4/extents.c index 87ba10d1d..551353b1b 100644 --- a/kernel/fs/ext4/extents.c +++ b/kernel/fs/ext4/extents.c @@ -39,6 +39,7 @@ #include <linux/slab.h> #include <asm/uaccess.h> #include <linux/fiemap.h> +#include <linux/backing-dev.h> #include "ext4_jbd2.h" #include "ext4_extents.h" #include "xattr.h" @@ -441,7 +442,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, int depth, ext4_fsblk_t pblk) { const char *error_msg; - int max = 0; + int max = 0, err = -EFSCORRUPTED; if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { error_msg = "invalid magic"; @@ -472,6 +473,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, if (ext_depth(inode) != depth && !ext4_extent_block_csum_verify(inode, eh)) { error_msg = "extent tree corrupted"; + err = -EFSBADCRC; goto corrupted; } return 0; @@ -484,7 +486,7 @@ corrupted: le16_to_cpu(eh->eh_magic), le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), max, le16_to_cpu(eh->eh_depth), depth); - return -EIO; + return err; } #define ext4_ext_check(inode, eh, depth, pblk) \ @@ -898,7 +900,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, bh = read_extent_tree_block(inode, path[ppos].p_block, --i, flags); - if (unlikely(IS_ERR(bh))) { + if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; } @@ -909,7 +911,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, put_bh(bh); EXT4_ERROR_INODE(inode, "ppos %d > depth %d", ppos, depth); - ret = -EIO; + ret = -EFSCORRUPTED; goto err; } path[ppos].p_bh = bh; @@ -958,7 +960,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, EXT4_ERROR_INODE(inode, "logical %d == ei_block %d!", logical, le32_to_cpu(curp->p_idx->ei_block)); - return -EIO; + return -EFSCORRUPTED; } if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) @@ -967,7 +969,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, "eh_entries %d >= eh_max %d!", le16_to_cpu(curp->p_hdr->eh_entries), le16_to_cpu(curp->p_hdr->eh_max)); - return -EIO; + return -EFSCORRUPTED; } if (logical > le32_to_cpu(curp->p_idx->ei_block)) { @@ -991,7 +993,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); - return -EIO; + return -EFSCORRUPTED; } ix->ei_block = cpu_to_le32(logical); @@ -1000,7 +1002,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); - return -EIO; + return -EFSCORRUPTED; } err = ext4_ext_dirty(handle, inode, curp); @@ -1041,7 +1043,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, * border from split point */ if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); - return -EIO; + return -EFSCORRUPTED; } if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { border = path[depth].p_ext[1].ee_block; @@ -1085,7 +1087,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, newblock = ablocks[--a]; if (unlikely(newblock == 0)) { EXT4_ERROR_INODE(inode, "newblock == 0!"); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); @@ -1111,7 +1113,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", path[depth].p_hdr->eh_entries, path[depth].p_hdr->eh_max); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } /* start copy from next extent */ @@ -1150,7 +1152,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, k = depth - at - 1; if (unlikely(k < 0)) { EXT4_ERROR_INODE(inode, "k %d < 0!", k); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } if (k) @@ -1190,7 +1192,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, EXT4_ERROR_INODE(inode, "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", le32_to_cpu(path[i].p_ext->ee_block)); - err = -EIO; + err = -EFSCORRUPTED; goto cleanup; } /* start copy indexes */ @@ -1424,7 +1426,7 @@ static int ext4_ext_search_left(struct inode *inode, if (unlikely(path == NULL)) { EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); - return -EIO; + return -EFSCORRUPTED; } depth = path->p_depth; *phys = 0; @@ -1443,7 +1445,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT4_ERROR_INODE(inode, "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", *logical, le32_to_cpu(ex->ee_block)); - return -EIO; + return -EFSCORRUPTED; } while (--depth >= 0) { ix = path[depth].p_idx; @@ -1454,7 +1456,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0, depth); - return -EIO; + return -EFSCORRUPTED; } } return 0; @@ -1464,7 +1466,7 @@ static int ext4_ext_search_left(struct inode *inode, EXT4_ERROR_INODE(inode, "logical %d < ee_block %d + ee_len %d!", *logical, le32_to_cpu(ex->ee_block), ee_len); - return -EIO; + return -EFSCORRUPTED; } *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; @@ -1494,7 +1496,7 @@ static int ext4_ext_search_right(struct inode *inode, if (unlikely(path == NULL)) { EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); - return -EIO; + return -EFSCORRUPTED; } depth = path->p_depth; *phys = 0; @@ -1513,7 +1515,7 @@ static int ext4_ext_search_right(struct inode *inode, EXT4_ERROR_INODE(inode, "first_extent(path[%d].p_hdr) != ex", depth); - return -EIO; + return -EFSCORRUPTED; } while (--depth >= 0) { ix = path[depth].p_idx; @@ -1521,7 +1523,7 @@ static int ext4_ext_search_right(struct inode *inode, EXT4_ERROR_INODE(inode, "ix != EXT_FIRST_INDEX *logical %d!", *logical); - return -EIO; + return -EFSCORRUPTED; } } goto found_extent; @@ -1531,7 +1533,7 @@ static int ext4_ext_search_right(struct inode *inode, EXT4_ERROR_INODE(inode, "logical %d < ee_block %d + ee_len %d!", *logical, le32_to_cpu(ex->ee_block), ee_len); - return -EIO; + return -EFSCORRUPTED; } if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { @@ -1669,7 +1671,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, if (unlikely(ex == NULL || eh == NULL)) { EXT4_ERROR_INODE(inode, "ex %p == NULL or eh %p == NULL", ex, eh); - return -EIO; + return -EFSCORRUPTED; } if (depth == 0) { @@ -1937,14 +1939,14 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, mb_flags |= EXT4_MB_DELALLOC_RESERVED; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); - return -EIO; + return -EFSCORRUPTED; } depth = ext_depth(inode); ex = path[depth].p_ext; eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - return -EIO; + return -EFSCORRUPTED; } /* try to insert block into found extent and return */ @@ -2171,7 +2173,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, if (unlikely(path[depth].p_hdr == NULL)) { up_read(&EXT4_I(inode)->i_data_sem); EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - err = -EIO; + err = -EFSCORRUPTED; break; } ex = path[depth].p_ext; @@ -2240,7 +2242,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, if (unlikely(es.es_len == 0)) { EXT4_ERROR_INODE(inode, "es.es_len == 0"); - err = -EIO; + err = -EFSCORRUPTED; break; } @@ -2263,7 +2265,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, "next extent == %u, next " "delalloc extent = %u", next, next_del); - err = -EIO; + err = -EFSCORRUPTED; break; } } @@ -2362,7 +2364,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); - return -EIO; + return -EFSCORRUPTED; } err = ext4_ext_get_access(handle, inode, path); if (err) @@ -2611,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - return -EIO; + return -EFSCORRUPTED; } /* find where to start removing */ ex = path[depth].p_ext; @@ -2665,7 +2667,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, "on extent %u:%u", start, end, ex_ee_block, ex_ee_block + ex_ee_len - 1); - err = -EIO; + err = -EFSCORRUPTED; goto out; } else if (a != ex_ee_block) { /* remove tail of the extent */ @@ -2840,7 +2842,7 @@ again: EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - err = -EIO; + err = -EFSCORRUPTED; } goto out; } @@ -2919,7 +2921,7 @@ again: i = 0; if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) { - err = -EIO; + err = -EFSCORRUPTED; goto out; } } @@ -2977,7 +2979,7 @@ again: * Should be a no-op if we did IO above. */ cond_resched(); if (WARN_ON(i + 1 > depth)) { - err = -EIO; + err = -EFSCORRUPTED; break; } path[i + 1].p_bh = bh; @@ -3053,7 +3055,7 @@ void ext4_ext_init(struct super_block *sb) * possible initialization would be here */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(sb)) { #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) printk(KERN_INFO "EXT4-fs: file extents enabled" #ifdef AGGRESSIVE_TEST @@ -3080,7 +3082,7 @@ void ext4_ext_init(struct super_block *sb) */ void ext4_ext_release(struct super_block *sb) { - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) + if (!ext4_has_feature_extents(sb)) return; #ifdef EXTENTS_STATS @@ -3344,7 +3346,7 @@ static int ext4_split_extent(handle_t *handle, if (!ex) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) map->m_lblk); - return -EIO; + return -EFSCORRUPTED; } unwritten = ext4_ext_is_unwritten(ex); split_flag1 = 0; @@ -3557,6 +3559,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); + if (ext4_encrypted_inode(inode)) + max_zeroout = 0; + /* If extent is less than s_max_zeroout_kb, zeroout directly */ if (max_zeroout && (ee_len <= max_zeroout)) { err = ext4_ext_zeroout(inode, ex); @@ -3969,7 +3974,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, if (!ex) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) map->m_lblk); - return -EIO; + return -EFSCORRUPTED; } } @@ -4307,7 +4312,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, "lblock: %lu, depth: %d pblock %lld", (unsigned long) map->m_lblk, depth, path[depth].p_block); - err = -EIO; + err = -EFSCORRUPTED; goto out2; } @@ -4456,6 +4461,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ar.flags |= EXT4_MB_HINT_NOPREALLOC; if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ar.flags |= EXT4_MB_DELALLOC_RESERVED; + if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) + ar.flags |= EXT4_MB_USE_RESERVED; newblock = ext4_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; @@ -4663,6 +4670,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, int ret = 0; int ret2 = 0; int retries = 0; + int depth = 0; struct ext4_map_blocks map; unsigned int credits; loff_t epos; @@ -4677,13 +4685,32 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, if (len <= EXT_UNWRITTEN_MAX_LEN) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + /* * credits to insert 1 extent into extent tree */ credits = ext4_chunk_trans_blocks(inode, len); + /* + * We can only call ext_depth() on extent based inodes + */ + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + depth = ext_depth(inode); + else + depth = -1; retry: while (ret >= 0 && len) { + /* + * Recalculate credits when extent tree depth changes. + */ + if (depth >= 0 && depth != ext_depth(inode)) { + credits = ext4_chunk_trans_blocks(inode, len); + depth = ext_depth(inode); + } + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); if (IS_ERR(handle)) { @@ -4725,6 +4752,8 @@ retry: goto retry; } + ext4_inode_resume_unlocked_dio(inode); + return ret > 0 ? ret2 : ret; } @@ -4912,12 +4941,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) * bug we should fix.... */ if (ext4_encrypted_inode(inode) && - (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))) + (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE | + FALLOC_FL_ZERO_RANGE))) return -EOPNOTSUPP; /* Return error if mode is not supported */ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | + FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) @@ -4930,6 +4961,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (mode & FALLOC_FL_COLLAPSE_RANGE) return ext4_collapse_range(inode, offset, len); + if (mode & FALLOC_FL_INSERT_RANGE) + return ext4_insert_range(inode, offset, len); + if (mode & FALLOC_FL_ZERO_RANGE) return ext4_zero_range(file, offset, len, mode); @@ -5224,13 +5258,13 @@ ext4_access_path(handle_t *handle, struct inode *inode, /* * ext4_ext_shift_path_extents: * Shift the extents of a path structure lying between path[depth].p_ext - * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift - * from starting block for each extent. + * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells + * if it is right shift or left shift operation. */ static int ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, struct inode *inode, handle_t *handle, - ext4_lblk_t *start) + enum SHIFT_DIRECTION SHIFT) { int depth, err = 0; struct ext4_extent *ex_start, *ex_last; @@ -5241,7 +5275,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, if (depth == path->p_depth) { ex_start = path[depth].p_ext; if (!ex_start) - return -EIO; + return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); @@ -5252,19 +5286,25 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) update = 1; - *start = le32_to_cpu(ex_last->ee_block) + - ext4_ext_get_actual_len(ex_last); - while (ex_start <= ex_last) { - le32_add_cpu(&ex_start->ee_block, -shift); - /* Try to merge to the left. */ - if ((ex_start > - EXT_FIRST_EXTENT(path[depth].p_hdr)) && - ext4_ext_try_to_merge_right(inode, - path, ex_start - 1)) + if (SHIFT == SHIFT_LEFT) { + le32_add_cpu(&ex_start->ee_block, + -shift); + /* Try to merge to the left. */ + if ((ex_start > + EXT_FIRST_EXTENT(path[depth].p_hdr)) + && + ext4_ext_try_to_merge_right(inode, + path, ex_start - 1)) + ex_last--; + else + ex_start++; + } else { + le32_add_cpu(&ex_last->ee_block, shift); + ext4_ext_try_to_merge_right(inode, path, + ex_last); ex_last--; - else - ex_start++; + } } err = ext4_ext_dirty(handle, inode, path + depth); if (err) @@ -5279,7 +5319,10 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, if (err) goto out; - le32_add_cpu(&path[depth].p_idx->ei_block, -shift); + if (SHIFT == SHIFT_LEFT) + le32_add_cpu(&path[depth].p_idx->ei_block, -shift); + else + le32_add_cpu(&path[depth].p_idx->ei_block, shift); err = ext4_ext_dirty(handle, inode, path + depth); if (err) goto out; @@ -5297,19 +5340,20 @@ out: /* * ext4_ext_shift_extents: - * All the extents which lies in the range from start to the last allocated - * block for the file are shifted downwards by shift blocks. + * All the extents which lies in the range from @start to the last allocated + * block for the @inode are shifted either towards left or right (depending + * upon @SHIFT) by @shift blocks. * On success, 0 is returned, error otherwise. */ static int ext4_ext_shift_extents(struct inode *inode, handle_t *handle, - ext4_lblk_t start, ext4_lblk_t shift) + ext4_lblk_t start, ext4_lblk_t shift, + enum SHIFT_DIRECTION SHIFT) { struct ext4_ext_path *path; int ret = 0, depth; struct ext4_extent *extent; - ext4_lblk_t stop_block; - ext4_lblk_t ex_start, ex_end; + ext4_lblk_t stop, *iterator, ex_start, ex_end; /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); @@ -5321,58 +5365,84 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, if (!extent) goto out; - stop_block = le32_to_cpu(extent->ee_block) + + stop = le32_to_cpu(extent->ee_block) + ext4_ext_get_actual_len(extent); - /* Nothing to shift, if hole is at the end of file */ - if (start >= stop_block) - goto out; + /* + * In case of left shift, Don't start shifting extents until we make + * sure the hole is big enough to accommodate the shift. + */ + if (SHIFT == SHIFT_LEFT) { + path = ext4_find_extent(inode, start - 1, &path, 0); + if (IS_ERR(path)) + return PTR_ERR(path); + depth = path->p_depth; + extent = path[depth].p_ext; + if (extent) { + ex_start = le32_to_cpu(extent->ee_block); + ex_end = le32_to_cpu(extent->ee_block) + + ext4_ext_get_actual_len(extent); + } else { + ex_start = 0; + ex_end = 0; + } - /* - * Don't start shifting extents until we make sure the hole is big - * enough to accomodate the shift. - */ - path = ext4_find_extent(inode, start - 1, &path, 0); - if (IS_ERR(path)) - return PTR_ERR(path); - depth = path->p_depth; - extent = path[depth].p_ext; - if (extent) { - ex_start = le32_to_cpu(extent->ee_block); - ex_end = le32_to_cpu(extent->ee_block) + - ext4_ext_get_actual_len(extent); - } else { - ex_start = 0; - ex_end = 0; + if ((start == ex_start && shift > ex_start) || + (shift > start - ex_end)) { + ext4_ext_drop_refs(path); + kfree(path); + return -EINVAL; + } } - if ((start == ex_start && shift > ex_start) || - (shift > start - ex_end)) - return -EINVAL; + /* + * In case of left shift, iterator points to start and it is increased + * till we reach stop. In case of right shift, iterator points to stop + * and it is decreased till we reach start. + */ + if (SHIFT == SHIFT_LEFT) + iterator = &start; + else + iterator = &stop; /* Its safe to start updating extents */ - while (start < stop_block) { - path = ext4_find_extent(inode, start, &path, 0); + while (start < stop) { + path = ext4_find_extent(inode, *iterator, &path, 0); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; extent = path[depth].p_ext; if (!extent) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", - (unsigned long) start); - return -EIO; + (unsigned long) *iterator); + return -EFSCORRUPTED; } - if (start > le32_to_cpu(extent->ee_block)) { + if (SHIFT == SHIFT_LEFT && *iterator > + le32_to_cpu(extent->ee_block)) { /* Hole, move to the next extent */ if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { path[depth].p_ext++; } else { - start = ext4_ext_next_allocated_block(path); + *iterator = ext4_ext_next_allocated_block(path); continue; } } + + if (SHIFT == SHIFT_LEFT) { + extent = EXT_LAST_EXTENT(path[depth].p_hdr); + *iterator = le32_to_cpu(extent->ee_block) + + ext4_ext_get_actual_len(extent); + } else { + extent = EXT_FIRST_EXTENT(path[depth].p_hdr); + *iterator = le32_to_cpu(extent->ee_block) > 0 ? + le32_to_cpu(extent->ee_block) - 1 : 0; + /* Update path extent in case we need to stop */ + while (le32_to_cpu(extent->ee_block) < start) + extent++; + path[depth].p_ext = extent; + } ret = ext4_ext_shift_path_extents(path, shift, inode, - handle, &start); + handle, SHIFT); if (ret) break; } @@ -5485,7 +5555,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, - punch_stop - punch_start); + punch_stop - punch_start, SHIFT_LEFT); if (ret) { up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; @@ -5510,6 +5580,174 @@ out_mutex: return ret; } +/* + * ext4_insert_range: + * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate. + * The data blocks starting from @offset to the EOF are shifted by @len + * towards right to create a hole in the @inode. Inode size is increased + * by len bytes. + * Returns 0 on success, error otherwise. + */ +int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) +{ + struct super_block *sb = inode->i_sb; + handle_t *handle; + struct ext4_ext_path *path; + struct ext4_extent *extent; + ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0; + unsigned int credits, ee_len; + int ret = 0, depth, split_flag = 0; + loff_t ioffset; + + /* + * We need to test this early because xfstests assumes that an + * insert range of (0, 1) will return EOPNOTSUPP if the file + * system does not support insert range. + */ + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + return -EOPNOTSUPP; + + /* Insert range works only on fs block size aligned offsets. */ + if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || + len & (EXT4_CLUSTER_SIZE(sb) - 1)) + return -EINVAL; + + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + trace_ext4_insert_range(inode, offset, len); + + offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); + len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb); + + /* Call ext4_force_commit to flush all data in case of data=journal */ + if (ext4_should_journal_data(inode)) { + ret = ext4_force_commit(inode->i_sb); + if (ret) + return ret; + } + + /* + * Need to round down to align start offset to page size boundary + * for page size > block size. + */ + ioffset = round_down(offset, PAGE_SIZE); + + /* Write out all dirty pages */ + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, + LLONG_MAX); + if (ret) + return ret; + + /* Take mutex lock */ + mutex_lock(&inode->i_mutex); + + /* Currently just for extent based files */ + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + ret = -EOPNOTSUPP; + goto out_mutex; + } + + /* Check for wrap through zero */ + if (inode->i_size + len > inode->i_sb->s_maxbytes) { + ret = -EFBIG; + goto out_mutex; + } + + /* Offset should be less than i_size */ + if (offset >= i_size_read(inode)) { + ret = -EINVAL; + goto out_mutex; + } + + truncate_pagecache(inode, ioffset); + + /* Wait for existing dio to complete */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + + credits = ext4_writepage_trans_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_dio; + } + + /* Expand file to avoid data loss if there is error while shifting */ + inode->i_size += len; + EXT4_I(inode)->i_disksize += len; + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ret = ext4_mark_inode_dirty(handle, inode); + if (ret) + goto out_stop; + + down_write(&EXT4_I(inode)->i_data_sem); + ext4_discard_preallocations(inode); + + path = ext4_find_extent(inode, offset_lblk, NULL, 0); + if (IS_ERR(path)) { + up_write(&EXT4_I(inode)->i_data_sem); + goto out_stop; + } + + depth = ext_depth(inode); + extent = path[depth].p_ext; + if (extent) { + ee_start_lblk = le32_to_cpu(extent->ee_block); + ee_len = ext4_ext_get_actual_len(extent); + + /* + * If offset_lblk is not the starting block of extent, split + * the extent @offset_lblk + */ + if ((offset_lblk > ee_start_lblk) && + (offset_lblk < (ee_start_lblk + ee_len))) { + if (ext4_ext_is_unwritten(extent)) + split_flag = EXT4_EXT_MARK_UNWRIT1 | + EXT4_EXT_MARK_UNWRIT2; + ret = ext4_split_extent_at(handle, inode, &path, + offset_lblk, split_flag, + EXT4_EX_NOCACHE | + EXT4_GET_BLOCKS_PRE_IO | + EXT4_GET_BLOCKS_METADATA_NOFAIL); + } + + ext4_ext_drop_refs(path); + kfree(path); + if (ret < 0) { + up_write(&EXT4_I(inode)->i_data_sem); + goto out_stop; + } + } + + ret = ext4_es_remove_extent(inode, offset_lblk, + EXT_MAX_BLOCKS - offset_lblk); + if (ret) { + up_write(&EXT4_I(inode)->i_data_sem); + goto out_stop; + } + + /* + * if offset_lblk lies in a hole which is at start of file, use + * ee_start_lblk to shift extents + */ + ret = ext4_ext_shift_extents(inode, handle, + ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk, + len_lblk, SHIFT_RIGHT); + + up_write(&EXT4_I(inode)->i_data_sem); + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + +out_stop: + ext4_journal_stop(handle); +out_dio: + ext4_inode_resume_unlocked_dio(inode); +out_mutex: + mutex_unlock(&inode->i_mutex); + return ret; +} + /** * ext4_swap_extents - Swap extents between two inodes * @@ -5542,7 +5780,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem)); BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem)); BUG_ON(!mutex_is_locked(&inode1->i_mutex)); - BUG_ON(!mutex_is_locked(&inode1->i_mutex)); + BUG_ON(!mutex_is_locked(&inode2->i_mutex)); *erp = ext4_es_remove_extent(inode1, lblk1, count); if (unlikely(*erp)) @@ -5558,7 +5796,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, int split = 0; path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); - if (unlikely(IS_ERR(path1))) { + if (IS_ERR(path1)) { *erp = PTR_ERR(path1); path1 = NULL; finish: @@ -5566,7 +5804,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, goto repeat; } path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); - if (unlikely(IS_ERR(path2))) { + if (IS_ERR(path2)) { *erp = PTR_ERR(path2); path2 = NULL; goto finish; diff --git a/kernel/fs/ext4/extents_status.c b/kernel/fs/ext4/extents_status.c index 26724aeec..ac748b3af 100644 --- a/kernel/fs/ext4/extents_status.c +++ b/kernel/fs/ext4/extents_status.c @@ -1089,20 +1089,9 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, return nr_shrunk; } -static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos) +int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v) { - return *pos ? NULL : SEQ_START_TOKEN; -} - -static void * -ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return NULL; -} - -static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) -{ - struct ext4_sb_info *sbi = seq->private; + struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private); struct ext4_es_stats *es_stats = &sbi->s_es_stats; struct ext4_inode_info *ei, *max = NULL; unsigned int inode_cnt = 0; @@ -1143,45 +1132,6 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) return 0; } -static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v) -{ -} - -static const struct seq_operations ext4_es_seq_shrinker_info_ops = { - .start = ext4_es_seq_shrinker_info_start, - .next = ext4_es_seq_shrinker_info_next, - .stop = ext4_es_seq_shrinker_info_stop, - .show = ext4_es_seq_shrinker_info_show, -}; - -static int -ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file) -{ - int ret; - - ret = seq_open(file, &ext4_es_seq_shrinker_info_ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); - } - - return ret; -} - -static int -ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file) -{ - return seq_release(inode, file); -} - -static const struct file_operations ext4_es_seq_shrinker_info_fops = { - .owner = THIS_MODULE, - .open = ext4_es_seq_shrinker_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = ext4_es_seq_shrinker_info_release, -}; - int ext4_es_register_shrinker(struct ext4_sb_info *sbi) { int err; @@ -1210,10 +1160,6 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) if (err) goto err2; - if (sbi->s_proc) - proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc, - &ext4_es_seq_shrinker_info_fops, sbi); - return 0; err2: @@ -1225,8 +1171,6 @@ err1: void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) { - if (sbi->s_proc) - remove_proc_entry("es_shrinker_info", sbi->s_proc); percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); unregister_shrinker(&sbi->s_es_shrinker); diff --git a/kernel/fs/ext4/extents_status.h b/kernel/fs/ext4/extents_status.h index 691b52613..f7aa24f46 100644 --- a/kernel/fs/ext4/extents_status.h +++ b/kernel/fs/ext4/extents_status.h @@ -172,4 +172,6 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); +extern int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v); + #endif /* _EXT4_EXTENTS_STATUS_H */ diff --git a/kernel/fs/ext4/file.c b/kernel/fs/ext4/file.c index 0613c256c..113837e7b 100644 --- a/kernel/fs/ext4/file.c +++ b/kernel/fs/ext4/file.c @@ -22,6 +22,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/path.h> +#include <linux/dax.h> #include <linux/quotaops.h> #include <linux/pagevec.h> #include <linux/uio.h> @@ -192,19 +193,88 @@ out: } #ifdef CONFIG_FS_DAX +static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) +{ + struct inode *inode = bh->b_assoc_map->host; + /* XXX: breaks on 32-bit > 16TB. Is that even supported? */ + loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; + int err; + if (!uptodate) + return; + WARN_ON(!buffer_unwritten(bh)); + err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); +} + static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_fault(vma, vmf, ext4_get_block); - /* Is this the right get_block? */ + int result; + handle_t *handle = NULL; + struct super_block *sb = file_inode(vma->vm_file)->i_sb; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + if (write) { + sb_start_pagefault(sb); + file_update_time(vma->vm_file); + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, + EXT4_DATA_TRANS_BLOCKS(sb)); + } + + if (IS_ERR(handle)) + result = VM_FAULT_SIGBUS; + else + result = __dax_fault(vma, vmf, ext4_get_block_dax, + ext4_end_io_unwritten); + + if (write) { + if (!IS_ERR(handle)) + ext4_journal_stop(handle); + sb_end_pagefault(sb); + } + + return result; +} + +static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned int flags) +{ + int result; + handle_t *handle = NULL; + struct inode *inode = file_inode(vma->vm_file); + struct super_block *sb = inode->i_sb; + bool write = flags & FAULT_FLAG_WRITE; + + if (write) { + sb_start_pagefault(sb); + file_update_time(vma->vm_file); + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, + ext4_chunk_trans_blocks(inode, + PMD_SIZE / PAGE_SIZE)); + } + + if (IS_ERR(handle)) + result = VM_FAULT_SIGBUS; + else + result = __dax_pmd_fault(vma, addr, pmd, flags, + ext4_get_block_dax, ext4_end_io_unwritten); + + if (write) { + if (!IS_ERR(handle)) + ext4_journal_stop(handle); + sb_end_pagefault(sb); + } + + return result; } static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext4_get_block); + return dax_mkwrite(vma, vmf, ext4_get_block_dax, + ext4_end_io_unwritten); } static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, + .pmd_fault = ext4_dax_pmd_fault, .page_mkwrite = ext4_dax_mkwrite, .pfn_mkwrite = dax_pfn_mkwrite, }; @@ -223,14 +293,16 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file->f_mapping->host; if (ext4_encrypted_inode(inode)) { - int err = ext4_generate_encryption_key(inode); + int err = ext4_get_encryption_info(inode); if (err) return 0; + if (ext4_encryption_info(inode) == NULL) + return -ENOKEY; } file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; } else { vma->vm_ops = &ext4_file_vm_ops; } @@ -278,6 +350,13 @@ static int ext4_file_open(struct inode * inode, struct file * filp) ext4_journal_stop(handle); } } + if (ext4_encrypted_inode(inode)) { + ret = ext4_get_encryption_info(inode); + if (ret) + return -EACCES; + if (ext4_encryption_info(inode) == NULL) + return -ENOKEY; + } /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present @@ -287,13 +366,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ret < 0) return ret; } - ret = dquot_file_open(inode, filp); - if (!ret && ext4_encrypted_inode(inode)) { - ret = ext4_generate_encryption_key(inode); - if (ret) - ret = -EACCES; - } - return ret; + return dquot_file_open(inode, filp); } /* diff --git a/kernel/fs/ext4/ialloc.c b/kernel/fs/ext4/ialloc.c index 1eaa6cb96..53f2b98a6 100644 --- a/kernel/fs/ext4/ialloc.c +++ b/kernel/fs/ext4/ialloc.c @@ -64,7 +64,7 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) } /* Initializes an uninitialized inode bitmap */ -static unsigned ext4_init_inode_bitmap(struct super_block *sb, +static int ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) @@ -76,7 +76,6 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -89,7 +88,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, count); } set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return 0; + return -EFSBADCRC; } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); @@ -99,7 +98,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, EXT4_INODES_PER_GROUP(sb) / 8); ext4_group_desc_csum_set(sb, block_group, gdp); - return EXT4_INODES_PER_GROUP(sb); + return 0; } void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) @@ -112,6 +111,42 @@ void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) put_bh(bh); } +static int ext4_validate_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + ext4_group_t block_group, + struct buffer_head *bh) +{ + ext4_fsblk_t blk; + struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (buffer_verified(bh)) + return 0; + if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) + return -EFSCORRUPTED; + + ext4_lock_group(sb, block_group); + blk = ext4_inode_bitmap(sb, desc); + if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, + EXT4_INODES_PER_GROUP(sb) / 8)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " + "inode_bitmap = %llu", block_group, blk); + grp = ext4_get_group_info(sb, block_group); + if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { + int count; + count = ext4_free_inodes_count(sb, desc); + percpu_counter_sub(&sbi->s_freeinodes_counter, + count); + } + set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); + return -EFSBADCRC; + } + set_buffer_verified(bh); + ext4_unlock_group(sb, block_group); + return 0; +} + /* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. @@ -124,12 +159,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) struct ext4_group_desc *desc; struct buffer_head *bh = NULL; ext4_fsblk_t bitmap_blk; - struct ext4_group_info *grp; - struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - return NULL; + return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_inode_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); @@ -137,7 +171,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-EIO); } if (bitmap_uptodate(bh)) goto verify; @@ -150,12 +184,17 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - ext4_init_inode_bitmap(sb, bh, block_group, desc); + err = ext4_init_inode_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); + if (err) { + ext4_error(sb, "Failed to init inode bitmap for group " + "%u: %d", block_group, err); + goto out; + } return bh; } ext4_unlock_group(sb, block_group); @@ -182,31 +221,17 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-EIO); } verify: - ext4_lock_group(sb, block_group); - if (!buffer_verified(bh) && - !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_INODES_PER_GROUP(sb) / 8)) { - ext4_unlock_group(sb, block_group); - put_bh(bh); - ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " - "inode_bitmap = %llu", block_group, bitmap_blk); - grp = ext4_get_group_info(sb, block_group); - if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - int count; - count = ext4_free_inodes_count(sb, desc); - percpu_counter_sub(&sbi->s_freeinodes_counter, - count); - } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return NULL; - } - ext4_unlock_group(sb, block_group); - set_buffer_verified(bh); + err = ext4_validate_inode_bitmap(sb, desc, block_group, bh); + if (err) + goto out; return bh; +out: + put_bh(bh); + return ERR_PTR(err); } /* @@ -286,8 +311,15 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) bitmap_bh = ext4_read_inode_bitmap(sb, block_group); /* Don't bother if the inode bitmap is corrupt. */ grp = ext4_get_group_info(sb, block_group); - if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh) + if (IS_ERR(bitmap_bh)) { + fatal = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto error_return; + } + if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) { + fatal = -EFSCORRUPTED; + goto error_return; + } BUFFER_TRACE(bitmap_bh, "get_write_access"); fatal = ext4_journal_get_write_access(handle, bitmap_bh); @@ -721,16 +753,30 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, struct ext4_group_desc *gdp = NULL; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int ret2, err = 0; + int ret2, err; struct inode *ret; ext4_group_t i; ext4_group_t flex_group; struct ext4_group_info *grp; + int encrypt = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) return ERR_PTR(-EPERM); + if ((ext4_encrypted_inode(dir) || + DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + err = ext4_get_encryption_info(dir); + if (err) + return ERR_PTR(err); + if (ext4_encryption_info(dir) == NULL) + return ERR_PTR(-EPERM); + if (!handle) + nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb); + encrypt = 1; + } + sb = dir->i_sb; ngroups = ext4_get_groups_count(sb); trace_ext4_request_inode(dir, mode); @@ -755,7 +801,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, inode->i_gid = dir->i_gid; } else inode_init_owner(inode, dir, mode); - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) + goto out; if (!goal) goal = sbi->s_inode_goal; @@ -810,7 +858,9 @@ got_group: brelse(inode_bitmap_bh); inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); /* Skip groups with suspicious inode tables */ - if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) { + if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || + IS_ERR(inode_bitmap_bh)) { + inode_bitmap_bh = NULL; if (++group == ngroups) group = 0; continue; @@ -886,8 +936,8 @@ got: struct buffer_head *block_bitmap_bh; block_bitmap_bh = ext4_read_block_bitmap(sb, group); - if (!block_bitmap_bh) { - err = -EIO; + if (IS_ERR(block_bitmap_bh)) { + err = PTR_ERR(block_bitmap_bh); goto out; } BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); @@ -996,12 +1046,6 @@ got: ei->i_block_group = group; ei->i_last_alloc_group = ~0; - /* If the directory encrypted, then we should encrypt the inode. */ - if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) && - (ext4_encrypted_inode(dir) || - DUMMY_ENCRYPTION_ENABLED(sbi))) - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) ext4_handle_sync(handle); @@ -1034,28 +1078,9 @@ got: ext4_set_inode_state(inode, EXT4_STATE_NEW); ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) && - (sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) { - ei->i_inline_off = 0; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_INLINE_DATA)) - ext4_set_inode_state(inode, - EXT4_STATE_MAY_INLINE_DATA); - } else { - /* Inline data and encryption are incompatible - * We turn off inline data since encryption is enabled */ - ei->i_inline_off = 1; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_INLINE_DATA)) - ext4_clear_inode_state(inode, - EXT4_STATE_MAY_INLINE_DATA); - } -#else ei->i_inline_off = 0; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) + if (ext4_has_feature_inline_data(sb)) ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); -#endif ret = inode; err = dquot_alloc_inode(inode); if (err) @@ -1069,7 +1094,7 @@ got: if (err) goto fail_free_drop; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(sb)) { /* set extent flag only for directory, file and normal symlink*/ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); @@ -1082,6 +1107,12 @@ got: ei->i_datasync_tid = handle->h_transaction->t_tid; } + if (encrypt) { + err = ext4_inherit_context(dir, inode); + if (err) + goto fail_free_drop; + } + err = ext4_mark_inode_dirty(handle, inode); if (err) { ext4_std_error(sb, err); @@ -1119,14 +1150,17 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); + err = -EFSCORRUPTED; goto error; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = ext4_read_inode_bitmap(sb, block_group); - if (!bitmap_bh) { - ext4_warning(sb, "inode bitmap error for orphan %lu", ino); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + ext4_warning(sb, "inode bitmap error %ld for orphan %lu", + ino, err); goto error; } @@ -1201,8 +1235,10 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) desc_count += ext4_free_inodes_count(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_inode_bitmap(sb, i); - if (!bitmap_bh) + if (IS_ERR(bitmap_bh)) { + bitmap_bh = NULL; continue; + } x = ext4_count_free(bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb) / 8); diff --git a/kernel/fs/ext4/indirect.c b/kernel/fs/ext4/indirect.c index 94ae6874c..355ef9c36 100644 --- a/kernel/fs/ext4/indirect.c +++ b/kernel/fs/ext4/indirect.c @@ -22,6 +22,7 @@ #include "ext4_jbd2.h" #include "truncate.h" +#include <linux/dax.h> #include <linux/uio.h> #include <trace/events/ext4.h> @@ -561,11 +562,10 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, /* * Okay, we need to do block allocation. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(inode->i_sb)) { EXT4_ERROR_INODE(inode, "Can't allocate blocks for " "non-extent mapped inodes with bigalloc"); - return -EUCLEAN; + return -EFSCORRUPTED; } /* Set up for the direct block allocation */ @@ -576,6 +576,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ar.flags = EXT4_MB_HINT_DATA; if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ar.flags |= EXT4_MB_DELALLOC_RESERVED; + if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) + ar.flags |= EXT4_MB_USE_RESERVED; ar.goal = ext4_find_goal(inode, map->m_lblk, partial); diff --git a/kernel/fs/ext4/inline.c b/kernel/fs/ext4/inline.c index 095c7a258..d884989cc 100644 --- a/kernel/fs/ext4/inline.c +++ b/kernel/fs/ext4/inline.c @@ -434,8 +434,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); - if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); @@ -995,20 +994,18 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, * and -EEXIST if directory entry already exists. */ static int ext4_add_dirent_to_inline(handle_t *handle, + struct ext4_filename *fname, struct dentry *dentry, struct inode *inode, struct ext4_iloc *iloc, void *inline_start, int inline_size) { struct inode *dir = d_inode(dentry->d_parent); - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; int err; struct ext4_dir_entry_2 *de; - err = ext4_find_dest_de(dir, inode, iloc->bh, - inline_start, inline_size, - name, namelen, &de); + err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start, + inline_size, fname, &de); if (err) return err; @@ -1016,8 +1013,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, err = ext4_journal_get_write_access(handle, iloc->bh); if (err) return err; - ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name, - name, namelen); + ext4_insert_dentry(dir, inode, de, inline_size, fname); ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); @@ -1248,8 +1244,8 @@ out: * If succeeds, return 0. If not, extended the inline dir and copied data to * the new created block. */ -int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode) +int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, + struct dentry *dentry, struct inode *inode) { int ret, inline_size; void *inline_start; @@ -1268,7 +1264,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc, + ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; @@ -1289,8 +1285,9 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, if (inline_size) { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); - ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc, - inline_start, inline_size); + ret = ext4_add_dirent_to_inline(handle, fname, dentry, + inode, &iloc, inline_start, + inline_size); if (ret != -ENOSPC) goto out; @@ -1611,6 +1608,7 @@ out: } struct buffer_head *ext4_find_inline_entry(struct inode *dir, + struct ext4_filename *fname, const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *has_inline_data) @@ -1632,8 +1630,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, inline_start = (void *)ext4_raw_inode(&iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = search_dir(iloc.bh, inline_start, inline_size, - dir, d_name, 0, res_dir); + ret = ext4_search_dir(iloc.bh, inline_start, inline_size, + dir, fname, d_name, 0, res_dir); if (ret == 1) goto out_find; if (ret < 0) @@ -1645,8 +1643,8 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, inline_start = ext4_get_inline_xattr_pos(dir, &iloc); inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; - ret = search_dir(iloc.bh, inline_start, inline_size, - dir, d_name, 0, res_dir); + ret = ext4_search_dir(iloc.bh, inline_start, inline_size, + dir, fname, d_name, 0, res_dir); if (ret == 1) goto out_find; diff --git a/kernel/fs/ext4/inode.c b/kernel/fs/ext4/inode.c index 966c61482..06bda0361 100644 --- a/kernel/fs/ext4/inode.c +++ b/kernel/fs/ext4/inode.c @@ -22,6 +22,7 @@ #include <linux/time.h> #include <linux/highuid.h> #include <linux/pagemap.h> +#include <linux/dax.h> #include <linux/quotaops.h> #include <linux/string.h> #include <linux/buffer_head.h> @@ -377,7 +378,7 @@ static int __check_block_validity(struct inode *inode, const char *func, "lblock %lu mapped to illegal pblock " "(length %d)", (unsigned long) map->m_lblk, map->m_len); - return -EIO; + return -EFSCORRUPTED; } return 0; } @@ -479,7 +480,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, /* We can handle the block number less than EXT_MAX_BLOCKS */ if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) - return -EIO; + return -EFSCORRUPTED; /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { @@ -656,16 +657,32 @@ has_zeroout: return retval; } -static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) +/* + * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages + * we have to be careful as someone else may be manipulating b_state as well. + */ +static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) { - struct inode *inode = bh->b_assoc_map->host; - /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ - loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; - int err; - if (!uptodate) + unsigned long old_state; + unsigned long new_state; + + flags &= EXT4_MAP_FLAGS; + + /* Dummy buffer_head? Set non-atomically. */ + if (!bh->b_page) { + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; return; - WARN_ON(!buffer_unwritten(bh)); - err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); + } + /* + * Someone else may be modifying b_state. Be careful! This is ugly but + * once we get rid of using bh as a container for mapping information + * to pass to / from get_block functions, this can go away. + */ + do { + old_state = READ_ONCE(bh->b_state); + new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; + } while (unlikely( + cmpxchg(&bh->b_state, old_state, new_state) != old_state)); } /* Maximum number of blocks we map for direct IO at once. */ @@ -704,11 +721,16 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, ext4_io_end_t *io_end = ext4_inode_aio(inode); map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; - if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { + ext4_update_bh_state(bh, map.m_flags); + if (IS_DAX(inode) && buffer_unwritten(bh)) { + /* + * dgc: I suspect unwritten conversion on ext4+DAX is + * fundamentally broken here when there are concurrent + * read/write in progress on this inode. + */ + WARN_ON_ONCE(io_end); bh->b_assoc_map = inode->i_mapping; bh->b_private = (void *)(unsigned long)iblock; - bh->b_end_io = ext4_end_io_unwritten; } if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) set_buffer_defer_completion(bh); @@ -731,18 +753,18 @@ int ext4_get_block(struct inode *inode, sector_t iblock, * `handle' can be NULL if create is zero */ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, - ext4_lblk_t block, int create) + ext4_lblk_t block, int map_flags) { struct ext4_map_blocks map; struct buffer_head *bh; + int create = map_flags & EXT4_GET_BLOCKS_CREATE; int err; J_ASSERT(handle != NULL || create == 0); map.m_lblk = block; map.m_len = 1; - err = ext4_map_blocks(handle, inode, &map, - create ? EXT4_GET_BLOCKS_CREATE : 0); + err = ext4_map_blocks(handle, inode, &map, map_flags); if (err == 0) return create ? ERR_PTR(-ENOSPC) : NULL; @@ -788,11 +810,11 @@ errout: } struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, - ext4_lblk_t block, int create) + ext4_lblk_t block, int map_flags) { struct buffer_head *bh; - bh = ext4_getblk(handle, inode, block, create); + bh = ext4_getblk(handle, inode, block, map_flags); if (IS_ERR(bh)) return bh; if (!bh || buffer_uptodate(bh)) @@ -971,7 +993,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (unlikely(err)) page_zero_new_buffers(page, from, to); else if (decrypt) - err = ext4_decrypt_one(inode, page); + err = ext4_decrypt(page); return err; } #endif @@ -1187,6 +1209,38 @@ errout: return ret ? ret : copied; } +/* + * This is a private version of page_zero_new_buffers() which doesn't + * set the buffer to be dirty, since in data=journalled mode we need + * to call ext4_handle_dirty_metadata() instead. + */ +static void zero_new_buffers(struct page *page, unsigned from, unsigned to) +{ + unsigned int block_start = 0, block_end; + struct buffer_head *head, *bh; + + bh = head = page_buffers(page); + do { + block_end = block_start + bh->b_size; + if (buffer_new(bh)) { + if (block_end > from && block_start < to) { + if (!PageUptodate(page)) { + unsigned start, size; + + start = max(from, block_start); + size = min(to, block_end) - start; + + zero_user(page, start, size); + set_buffer_uptodate(bh); + } + clear_buffer_new(bh); + } + } + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); +} + static int ext4_journalled_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -1213,7 +1267,7 @@ static int ext4_journalled_write_end(struct file *file, if (copied < len) { if (!PageUptodate(page)) copied = 0; - page_zero_new_buffers(page, from+copied, to); + zero_new_buffers(page, from+copied, to); } ret = ext4_walk_page_buffers(handle, page_buffers(page), from, @@ -1261,13 +1315,12 @@ static int ext4_journalled_write_end(struct file *file, } /* - * Reserve a single cluster located at lblock + * Reserve space for a single cluster */ -static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) +static int ext4_da_reserve_space(struct inode *inode) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - unsigned int md_needed; int ret; /* @@ -1279,25 +1332,14 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) if (ret) return ret; - /* - * recalculate the amount of metadata blocks to reserve - * in order to allocate nrblocks - * worse case is one extent per block - */ spin_lock(&ei->i_block_reservation_lock); - /* - * ext4_calc_metadata_amount() has side effects, which we have - * to be prepared undo if we fail to claim space. - */ - md_needed = 0; - trace_ext4_da_reserve_space(inode, 0); - if (ext4_claim_free_clusters(sbi, 1, 0)) { spin_unlock(&ei->i_block_reservation_lock); dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } ei->i_reserved_data_blocks++; + trace_ext4_da_reserve_space(inode); spin_unlock(&ei->i_block_reservation_lock); return 0; /* success */ @@ -1575,9 +1617,9 @@ add_delayed: * then we don't need to reserve it again. However we still need * to reserve metadata for every block we're going to write. */ - if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 || + if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 || !ext4_find_delalloc_cluster(inode, map->m_lblk)) { - ret = ext4_da_reserve_space(inode, iblock); + ret = ext4_da_reserve_space(inode); if (ret) { /* not enough space to reserve */ retval = ret; @@ -1655,7 +1697,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return ret; map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (buffer_unwritten(bh)) { /* A delayed write to unwritten bh should be marked @@ -1833,11 +1875,22 @@ static int ext4_writepage(struct page *page, * the page. But we may reach here when we do a journal commit via * journal_submit_inode_data_buffers() and in that case we must write * allocated buffers to achieve data=ordered mode guarantees. + * + * Also, if there is only one buffer per page (the fs block + * size == the page size), if one buffer needs block + * allocation or needs to modify the extent tree to clear the + * unwritten flag, we know that the page can't be written at + * all, so we might as well refuse the write immediately. + * Unfortunately if the block size != page size, we can't as + * easily detect this case using ext4_walk_page_buffers(), but + * for the extremely common case, this is an optimization that + * skips a useless round trip through ext4_bio_write_page(). */ if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, ext4_bh_delay_or_unwritten)) { redirty_page_for_writepage(wbc, page); - if (current->flags & PF_MEMALLOC) { + if ((current->flags & PF_MEMALLOC) || + (inode->i_sb->s_blocksize == PAGE_CACHE_SIZE)) { /* * For memory cleaning there's no point in writing only * some buffers. So just bail out. Warn if we came here @@ -2617,8 +2670,7 @@ static int ext4_nonda_switch(struct super_block *sb) /* We always reserve for an inode update; the superblock could be there too */ static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) { - if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) + if (likely(ext4_has_feature_large_file(inode->i_sb))) return 1; if (pos + len <= 0x7fffffffULL) @@ -3039,6 +3091,17 @@ static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, EXT4_GET_BLOCKS_NO_LOCK); } +int ext4_get_block_dax(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT; + if (create) + flags |= EXT4_GET_BLOCKS_CREATE; + ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n", + inode->i_ino, create); + return _ext4_get_block(inode, iblock, bh_result, flags); +} + static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private) { @@ -3351,7 +3414,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, int err = 0; page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, - mapping_gfp_mask(mapping) & ~__GFP_FS); + mapping_gfp_constraint(mapping, ~__GFP_FS)); if (!page) return -ENOMEM; @@ -3400,7 +3463,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, /* We expect the key to be set. */ BUG_ON(!ext4_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_CACHE_SIZE); - WARN_ON_ONCE(ext4_decrypt_one(inode, page)); + WARN_ON_ONCE(ext4_decrypt(page)); } } if (ext4_should_journal_data(inode)) { @@ -3827,7 +3890,7 @@ static int __ext4_get_inode_loc(struct inode *inode, iloc->bh = NULL; if (!ext4_valid_inum(sb, inode->i_ino)) - return -EIO; + return -EFSCORRUPTED; iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); @@ -4013,8 +4076,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, struct inode *inode = &(ei->vfs_inode); struct super_block *sb = inode->i_sb; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (ext4_has_feature_huge_file(sb)) { /* we are using combined 48 bit field */ i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | le32_to_cpu(raw_inode->i_blocks_lo); @@ -4075,7 +4137,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, EXT4_INODE_SIZE(inode->i_sb)); - ret = -EIO; + ret = -EFSCORRUPTED; goto bad_inode; } } else @@ -4095,7 +4157,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { EXT4_ERROR_INODE(inode, "checksum invalid"); - ret = -EIO; + ret = -EFSBADCRC; goto bad_inode; } @@ -4137,7 +4199,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_flags = le32_to_cpu(raw_inode->i_flags); inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) + if (ext4_has_feature_64bit(sb)) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); @@ -4210,7 +4272,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", ei->i_file_acl); - ret = -EIO; + ret = -EFSCORRUPTED; goto bad_inode; } else if (!ext4_has_inline_data(inode)) { if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { @@ -4237,8 +4299,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext4_inode_is_fast_symlink(inode) && - !ext4_encrypted_inode(inode)) { + if (ext4_encrypted_inode(inode)) { + inode->i_op = &ext4_encrypted_symlink_inode_operations; + ext4_set_aops(inode); + } else if (ext4_inode_is_fast_symlink(inode)) { + inode->i_link = (char *)ei->i_data; inode->i_op = &ext4_fast_symlink_inode_operations; nd_terminate_link(ei->i_data, inode->i_size, sizeof(ei->i_data) - 1); @@ -4258,7 +4323,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } else if (ino == EXT4_BOOT_LOADER_INO) { make_bad_inode(inode); } else { - ret = -EIO; + ret = -EFSCORRUPTED; EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); goto bad_inode; } @@ -4276,7 +4341,7 @@ bad_inode: struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) { if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); return ext4_iget(sb, ino); } @@ -4298,7 +4363,7 @@ static int ext4_inode_blocks_set(handle_t *handle, ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); return 0; } - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) + if (!ext4_has_feature_huge_file(sb)) return -EFBIG; if (i_blocks <= 0xffffffffffffULL) { @@ -4459,8 +4524,7 @@ static int ext4_do_update_inode(handle_t *handle, need_datasync = 1; } if (ei->i_disksize > 0x7fffffffULL) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || + if (!ext4_has_feature_large_file(sb) || EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV)) set_large_file = 1; @@ -4509,8 +4573,7 @@ static int ext4_do_update_inode(handle_t *handle, if (err) goto out_brelse; ext4_update_dynamic_rev(sb); - EXT4_SET_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_LARGE_FILE); + ext4_set_feature_large_file(sb); ext4_handle_sync(handle); err = ext4_handle_dirty_super(handle, sb); } @@ -4677,8 +4740,11 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if (is_quota_modification(inode, attr)) - dquot_initialize(inode); + if (is_quota_modification(inode, attr)) { + error = dquot_initialize(inode); + if (error) + return error; + } if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { handle_t *handle; @@ -4707,8 +4773,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_journal_stop(handle); } - if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { + if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; + loff_t oldsize = inode->i_size; + int shrink = (attr->ia_size <= inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -4716,27 +4784,37 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size > sbi->s_bitmap_maxbytes) return -EFBIG; } + if (!S_ISREG(inode->i_mode)) + return -EINVAL; if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) inode_inc_iversion(inode); - if (S_ISREG(inode->i_mode) && + if (ext4_should_order_data(inode) && (attr->ia_size < inode->i_size)) { - if (ext4_should_order_data(inode)) { - error = ext4_begin_ordered_truncate(inode, + error = ext4_begin_ordered_truncate(inode, attr->ia_size); - if (error) - goto err_out; - } + if (error) + goto err_out; + } + if (attr->ia_size != inode->i_size) { handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); if (IS_ERR(handle)) { error = PTR_ERR(handle); goto err_out; } - if (ext4_handle_valid(handle)) { + if (ext4_handle_valid(handle) && shrink) { error = ext4_orphan_add(handle, inode); orphan = 1; } + /* + * Update c/mtime on truncate up, ext4_truncate() will + * update c/mtime in shrink case below + */ + if (!shrink) { + inode->i_mtime = ext4_current_time(inode); + inode->i_ctime = inode->i_mtime; + } down_write(&EXT4_I(inode)->i_data_sem); EXT4_I(inode)->i_disksize = attr->ia_size; rc = ext4_mark_inode_dirty(handle, inode); @@ -4752,15 +4830,13 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) { - ext4_orphan_del(NULL, inode); + if (orphan) + ext4_orphan_del(NULL, inode); goto err_out; } - } else { - loff_t oldsize = inode->i_size; - - i_size_write(inode, attr->ia_size); - pagecache_isize_extended(inode, oldsize, inode->i_size); } + if (!shrink) + pagecache_isize_extended(inode, oldsize, inode->i_size); /* * Blocks are going to be removed from the inode. Wait @@ -4780,13 +4856,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) * in data=journal mode to make pages freeable. */ truncate_pagecache(inode, inode->i_size); + if (shrink) + ext4_truncate(inode); } - /* - * We want to call ext4_truncate() even if attr->ia_size == - * inode->i_size for cases like truncation of fallocated space - */ - if (attr->ia_valid & ATTR_SIZE) - ext4_truncate(inode); if (!rc) { setattr_copy(inode, attr); @@ -5239,7 +5311,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) !ext4_should_journal_data(inode) && !ext4_nonda_switch(inode->i_sb)) { do { - ret = __block_page_mkwrite(vma, vmf, + ret = block_page_mkwrite(vma, vmf, ext4_da_get_block_prep); } while (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)); @@ -5286,7 +5358,7 @@ retry_alloc: ret = VM_FAULT_SIGBUS; goto out; } - ret = __block_page_mkwrite(vma, vmf, get_block); + ret = block_page_mkwrite(vma, vmf, get_block); if (!ret && ext4_should_journal_data(inode)) { if (ext4_walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { diff --git a/kernel/fs/ext4/ioctl.c b/kernel/fs/ext4/ioctl.c index 2cb9e178d..5e872fd40 100644 --- a/kernel/fs/ext4/ioctl.c +++ b/kernel/fs/ext4/ioctl.c @@ -31,14 +31,11 @@ static void memswap(void *a, void *b, size_t len) { unsigned char *ap, *bp; - unsigned char tmp; ap = (unsigned char *)a; bp = (unsigned char *)b; while (len-- > 0) { - tmp = *ap; - *ap = *bp; - *bp = tmp; + swap(*ap, *bp); ap++; bp++; } @@ -148,8 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb, inode_bl->i_version = 1; i_size_write(inode_bl, 0); inode_bl->i_mode = S_IFREG; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); ext4_ext_tree_init(handle, inode_bl); } else @@ -386,8 +382,7 @@ setversion_out: goto group_extend_out; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; @@ -435,8 +430,7 @@ group_extend_out: goto mext_out; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); err = -EOPNOTSUPP; @@ -473,8 +467,7 @@ mext_out: goto group_add_out; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; @@ -556,8 +549,7 @@ group_add_out: int err = 0, err2 = 0; ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not (yet) supported with bigalloc"); return -EOPNOTSUPP; @@ -675,8 +667,8 @@ encryption_policy_out: if (err) return err; } - if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt, - 16)) + if (copy_to_user((void __user *) arg, + sbi->s_es->s_encrypt_pw_salt, 16)) return -EFAULT; return 0; } @@ -690,7 +682,7 @@ encryption_policy_out: err = ext4_get_policy(inode, &policy); if (err) return err; - if (copy_to_user((void *)arg, &policy, sizeof(policy))) + if (copy_to_user((void __user *)arg, &policy, sizeof(policy))) return -EFAULT; return 0; #else @@ -758,7 +750,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } case EXT4_IOC_MOVE_EXT: - case FITRIM: case EXT4_IOC_RESIZE_FS: case EXT4_IOC_PRECACHE_EXTENTS: case EXT4_IOC_SET_ENCRYPTION_POLICY: diff --git a/kernel/fs/ext4/mballoc.c b/kernel/fs/ext4/mballoc.c index 41260489d..61eaf74dc 100644 --- a/kernel/fs/ext4/mballoc.c +++ b/kernel/fs/ext4/mballoc.c @@ -26,6 +26,7 @@ #include <linux/log2.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/backing-dev.h> #include <trace/events/ext4.h> #ifdef CONFIG_EXT4_DEBUG @@ -873,8 +874,10 @@ static int ext4_mb_init_cache(struct page *page, char *incore) bh[i] = NULL; continue; } - if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { - err = -ENOMEM; + bh[i] = ext4_read_block_bitmap_nowait(sb, group); + if (IS_ERR(bh[i])) { + err = PTR_ERR(bh[i]); + bh[i] = NULL; goto out; } mb_debug(1, "read bitmap for group %u\n", group); @@ -882,10 +885,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore) /* wait for I/O completion */ for (i = 0, group = first_group; i < groups_per_page; i++, group++) { - if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) { - err = -EIO; - goto out; - } + int err2; + + if (!bh[i]) + continue; + err2 = ext4_wait_block_bitmap(sb, group, bh[i]); + if (!err) + err = err2; } first_block = page->index * blocks_per_page; @@ -898,6 +904,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) /* skip initialized uptodate buddy */ continue; + if (!buffer_verified(bh[group - first_group])) + /* Skip faulty bitmaps */ + continue; + err = 0; + /* * data carry information regarding this * particular group in the format specified @@ -2008,7 +2019,12 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, } } -/* This is now called BEFORE we load the buddy bitmap. */ +/* + * This is now called BEFORE we load the buddy bitmap. + * Returns either 1 or 0 indicating that the group is either suitable + * for the allocation or not. In addition it can also return negative + * error code when something goes wrong. + */ static int ext4_mb_good_group(struct ext4_allocation_context *ac, ext4_group_t group, int cr) { @@ -2031,7 +2047,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { int ret = ext4_mb_init_group(ac->ac_sb, group); if (ret) - return 0; + return ret; } fragments = grp->bb_fragments; @@ -2078,7 +2094,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { ext4_group_t ngroups, group, i; int cr; - int err = 0; + int err = 0, first_err = 0; struct ext4_sb_info *sbi; struct super_block *sb; struct ext4_buddy e4b; @@ -2145,6 +2161,7 @@ repeat: group = ac->ac_g_ex.fe_group; for (i = 0; i < ngroups; group++, i++) { + int ret = 0; cond_resched(); /* * Artificially restricted ngroups for non-extent @@ -2154,8 +2171,12 @@ repeat: group = 0; /* This now checks without needing the buddy page */ - if (!ext4_mb_good_group(ac, group, cr)) + ret = ext4_mb_good_group(ac, group, cr); + if (ret <= 0) { + if (!first_err) + first_err = ret; continue; + } err = ext4_mb_load_buddy(sb, group, &e4b); if (err) @@ -2167,9 +2188,12 @@ repeat: * We need to check again after locking the * block group */ - if (!ext4_mb_good_group(ac, group, cr)) { + ret = ext4_mb_good_group(ac, group, cr); + if (ret <= 0) { ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); + if (!first_err) + first_err = ret; continue; } @@ -2216,6 +2240,8 @@ repeat: } } out: + if (!err && ac->ac_status != AC_STATUS_FOUND && first_err) + err = first_err; return err; } @@ -2257,12 +2283,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) group--; if (group == 0) - seq_printf(seq, "#%-5s: %-5s %-5s %-5s " - "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s " - "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", - "group", "free", "frags", "first", - "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6", - "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13"); + seq_puts(seq, "#group: free frags first [" + " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " + " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]"); i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + sizeof(struct ext4_group_info); @@ -2317,7 +2340,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) } -static const struct file_operations ext4_mb_seq_groups_fops = { +const struct file_operations ext4_seq_mb_groups_fops = { .owner = THIS_MODULE, .open = ext4_mb_seq_groups_open, .read = seq_read, @@ -2431,7 +2454,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, kmalloc(sb->s_blocksize, GFP_NOFS); BUG_ON(meta_group_info[i]->bb_bitmap == NULL); bh = ext4_read_block_bitmap(sb, group); - BUG_ON(bh == NULL); + BUG_ON(IS_ERR_OR_NULL(bh)); memcpy(meta_group_info[i]->bb_bitmap, bh->b_data, sb->s_blocksize); put_bh(bh); @@ -2645,10 +2668,6 @@ int ext4_mb_init(struct super_block *sb) if (ret != 0) goto out_free_locality_groups; - if (sbi->s_proc) - proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_groups_fops, sb); - return 0; out_free_locality_groups: @@ -2689,9 +2708,6 @@ int ext4_mb_release(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); - if (sbi->s_proc) - remove_proc_entry("mb_groups", sbi->s_proc); - if (sbi->s_group_info) { for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); @@ -2880,10 +2896,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, sb = ac->ac_sb; sbi = EXT4_SB(sb); - err = -EIO; bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); - if (!bitmap_bh) + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto out_err; + } BUFFER_TRACE(bitmap_bh, "getting write access"); err = ext4_journal_get_write_access(handle, bitmap_bh); @@ -3827,8 +3845,10 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, return 0; bitmap_bh = ext4_read_block_bitmap(sb, group); - if (bitmap_bh == NULL) { - ext4_error(sb, "Error reading block bitmap for %u", group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + ext4_error(sb, "Error %d reading block bitmap for %u", + err, group); return 0; } @@ -3999,9 +4019,10 @@ repeat: } bitmap_bh = ext4_read_block_bitmap(sb, group); - if (bitmap_bh == NULL) { - ext4_error(sb, "Error reading block bitmap for %u", - group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + ext4_error(sb, "Error %d reading block bitmap for %u", + err, group); ext4_mb_unload_buddy(&e4b); continue; } @@ -4666,22 +4687,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ext4_debug("freeing block %llu\n", block); trace_ext4_free_blocks(inode, block, count, flags); - if (flags & EXT4_FREE_BLOCKS_FORGET) { - struct buffer_head *tbh = bh; - int i; - - BUG_ON(bh && (count > 1)); + if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + BUG_ON(count > 1); - for (i = 0; i < count; i++) { - cond_resched(); - if (!bh) - tbh = sb_find_get_block(inode->i_sb, - block + i); - if (!tbh) - continue; - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, - inode, tbh, block + i); - } + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block); } /* @@ -4726,6 +4736,19 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += sbi->s_cluster_ratio - overflow; } + if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { + int i; + + for (i = 0; i < count; i++) { + cond_resched(); + bh = sb_find_get_block(inode->i_sb, block + i); + if (!bh) + continue; + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, bh, block + i); + } + } + do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -4745,8 +4768,9 @@ do_more: } count_clusters = EXT4_NUM_B2C(sbi, count); bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) { - err = -EIO; + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto error_return; } gdp = ext4_get_group_desc(sb, block_group, &gd_bh); @@ -4915,8 +4939,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, } bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) { - err = -EIO; + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto error_return; } diff --git a/kernel/fs/ext4/migrate.c b/kernel/fs/ext4/migrate.c index 6163ad21c..a4651894c 100644 --- a/kernel/fs/ext4/migrate.c +++ b/kernel/fs/ext4/migrate.c @@ -448,8 +448,7 @@ int ext4_ext_migrate(struct inode *inode) * If the filesystem does not support extents, or the inode * already is extent-based, error out. */ - if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_INCOMPAT_EXTENTS) || + if (!ext4_has_feature_extents(inode->i_sb) || (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EINVAL; @@ -625,13 +624,11 @@ int ext4_ind_migrate(struct inode *inode) handle_t *handle; int ret; - if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_INCOMPAT_EXTENTS) || + if (!ext4_has_feature_extents(inode->i_sb) || (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EINVAL; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + if (ext4_has_feature_bigalloc(inode->i_sb)) return -EOPNOTSUPP; /* diff --git a/kernel/fs/ext4/mmp.c b/kernel/fs/ext4/mmp.c index 8313ca332..0a512aa81 100644 --- a/kernel/fs/ext4/mmp.c +++ b/kernel/fs/ext4/mmp.c @@ -69,6 +69,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ext4_fsblk_t mmp_block) { struct mmp_struct *mmp; + int ret; if (*bh) clear_buffer_uptodate(*bh); @@ -76,33 +77,38 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, /* This would be sb_bread(sb, mmp_block), except we need to be sure * that the MD RAID device cache has been bypassed, and that the read * is not blocked in the elevator. */ - if (!*bh) + if (!*bh) { *bh = sb_getblk(sb, mmp_block); - if (!*bh) - return -ENOMEM; - if (*bh) { - get_bh(*bh); - lock_buffer(*bh); - (*bh)->b_end_io = end_buffer_read_sync; - submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh); - wait_on_buffer(*bh); - if (!buffer_uptodate(*bh)) { - brelse(*bh); - *bh = NULL; + if (!*bh) { + ret = -ENOMEM; + goto warn_exit; } } - if (unlikely(!*bh)) { - ext4_warning(sb, "Error while reading MMP block %llu", - mmp_block); - return -EIO; + + get_bh(*bh); + lock_buffer(*bh); + (*bh)->b_end_io = end_buffer_read_sync; + submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh); + wait_on_buffer(*bh); + if (!buffer_uptodate(*bh)) { + brelse(*bh); + *bh = NULL; + ret = -EIO; + goto warn_exit; } mmp = (struct mmp_struct *)((*bh)->b_data); - if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || - !ext4_mmp_csum_verify(sb, mmp)) - return -EINVAL; - - return 0; + if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) + ret = -EFSCORRUPTED; + else if (!ext4_mmp_csum_verify(sb, mmp)) + ret = -EFSBADCRC; + else + return 0; + +warn_exit: + ext4_warning(sb, "Error %d while reading MMP block %llu", + ret, mmp_block); + return ret; } /* @@ -111,7 +117,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, const char *function, unsigned int line, const char *msg) { - __ext4_warning(sb, function, line, msg); + __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, "MMP failure info: last update time: %llu, last update " "node: %s, last update device: %s\n", diff --git a/kernel/fs/ext4/move_extent.c b/kernel/fs/ext4/move_extent.c index 370420bfa..e032a0423 100644 --- a/kernel/fs/ext4/move_extent.c +++ b/kernel/fs/ext4/move_extent.c @@ -166,12 +166,9 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2, */ wait_on_page_writeback(page[0]); wait_on_page_writeback(page[1]); - if (inode1 > inode2) { - struct page *tmp; - tmp = page[0]; - page[0] = page[1]; - page[1] = tmp; - } + if (inode1 > inode2) + swap(page[0], page[1]); + return 0; } @@ -268,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int tmp_data_size, data_size, replaced_size; - int err2, jblocks, retries = 0; + int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; + struct buffer_head *bh = NULL; /* * It needs twice the amount of ordinary journal buffers because @@ -383,8 +381,16 @@ data_copy: } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - *err = __block_write_begin(pagep[0], from, replaced_size, - ext4_get_block); + if (!page_has_buffers(pagep[0])) + create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); + bh = page_buffers(pagep[0]); + for (i = 0; i < data_offset_in_page; i++) + bh = bh->b_this_page; + for (i = 0; i < block_len_in_page; i++) { + *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); + if (*err < 0) + break; + } if (!*err) *err = block_commit_write(pagep[0], from, from + replaced_size); @@ -574,12 +580,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - /* TODO: This is non obvious task to swap blocks for inodes with full - jornaling enabled */ + + /* TODO: it's not obvious how to swap blocks for inodes with full + journaling enabled */ if (ext4_should_journal_data(orig_inode) || ext4_should_journal_data(donor_inode)) { - return -EINVAL; + ext4_msg(orig_inode->i_sb, KERN_ERR, + "Online defrag not supported with data journaling"); + return -EOPNOTSUPP; } + /* Protect orig and donor inodes against a truncate */ lock_two_nondirectories(orig_inode, donor_inode); diff --git a/kernel/fs/ext4/namei.c b/kernel/fs/ext4/namei.c index 814f3beb4..a969ab39f 100644 --- a/kernel/fs/ext4/namei.c +++ b/kernel/fs/ext4/namei.c @@ -61,7 +61,7 @@ static struct buffer_head *ext4_append(handle_t *handle, *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - bh = ext4_bread(handle, inode, *block, 1); + bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE); if (IS_ERR(bh)) return bh; inode->i_size += inode->i_sb->s_blocksize; @@ -84,12 +84,13 @@ typedef enum { } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ - __ext4_read_dirblock((inode), (block), (type), __LINE__) + __ext4_read_dirblock((inode), (block), (type), __func__, __LINE__) static struct buffer_head *__ext4_read_dirblock(struct inode *inode, - ext4_lblk_t block, - dirblock_type_t type, - unsigned int line) + ext4_lblk_t block, + dirblock_type_t type, + const char *func, + unsigned int line) { struct buffer_head *bh; struct ext4_dir_entry *dirent; @@ -97,16 +98,18 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, bh = ext4_bread(NULL, inode, block, 0); if (IS_ERR(bh)) { - __ext4_warning(inode->i_sb, __func__, line, - "error %ld reading directory block " - "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino, - (unsigned long) block); + __ext4_warning(inode->i_sb, func, line, + "inode #%lu: lblock %lu: comm %s: " + "error %ld reading directory block", + inode->i_ino, (unsigned long)block, + current->comm, PTR_ERR(bh)); return bh; } if (!bh) { - ext4_error_inode(inode, __func__, line, block, "Directory hole found"); - return ERR_PTR(-EIO); + ext4_error_inode(inode, func, line, block, + "Directory hole found"); + return ERR_PTR(-EFSCORRUPTED); } dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ @@ -119,9 +122,9 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, is_dx_block = 1; } if (!is_dx_block && type == INDEX) { - ext4_error_inode(inode, __func__, line, block, + ext4_error_inode(inode, func, line, block, "directory leaf block found instead of index block"); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } if (!ext4_has_metadata_csum(inode->i_sb) || buffer_verified(bh)) @@ -136,20 +139,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (ext4_dx_csum_verify(inode, dirent)) set_buffer_verified(bh); else { - ext4_error_inode(inode, __func__, line, block, - "Directory index failed checksum"); + ext4_error_inode(inode, func, line, block, + "Directory index failed checksum"); brelse(bh); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSBADCRC); } } if (!is_dx_block) { if (ext4_dirent_csum_verify(inode, dirent)) set_buffer_verified(bh); else { - ext4_error_inode(inode, __func__, line, block, - "Directory block failed checksum"); + ext4_error_inode(inode, func, line, block, + "Directory block failed checksum"); brelse(bh); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSBADCRC); } } return bh; @@ -248,7 +251,7 @@ static void dx_set_count(struct dx_entry *entries, unsigned value); static void dx_set_limit(struct dx_entry *entries, unsigned value); static unsigned dx_root_limit(struct inode *dir, unsigned infosize); static unsigned dx_node_limit(struct inode *dir); -static struct dx_frame *dx_probe(const struct qstr *d_name, +static struct dx_frame *dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame); @@ -267,10 +270,10 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frames, __u32 *start_hash); static struct buffer_head * ext4_dx_find_entry(struct inode *dir, - const struct qstr *d_name, + struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir); -static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode); +static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, + struct dentry *dentry, struct inode *inode); /* checksumming functions */ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, @@ -327,10 +330,14 @@ static __le32 ext4_dirent_csum(struct inode *inode, return cpu_to_le32(csum); } -static void warn_no_space_for_csum(struct inode *inode) +#define warn_no_space_for_csum(inode) \ + __warn_no_space_for_csum((inode), __func__, __LINE__) + +static void __warn_no_space_for_csum(struct inode *inode, const char *func, + unsigned int line) { - ext4_warning(inode->i_sb, "no space in directory inode %lu leaf for " - "checksum. Please run e2fsck -D.", inode->i_ino); + __ext4_warning_inode(inode, func, line, + "No space for directory leaf checksum. Please run e2fsck -D."); } int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) @@ -607,17 +614,15 @@ static struct stats dx_show_leaf(struct inode *dir, char *name; struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; - struct ext4_fname_crypto_ctx *ctx = NULL; - int res; + int res = 0; name = de->name; len = de->name_len; - ctx = ext4_get_fname_crypto_ctx(dir, - EXT4_NAME_LEN); - if (IS_ERR(ctx)) { - printk(KERN_WARNING "Error acquiring" - " crypto ctxt--skipping crypto\n"); - ctx = NULL; + if (ext4_encrypted_inode(inode)) + res = ext4_get_encryption_info(dir); + if (res) { + printk(KERN_WARNING "Error setting up" + " fname crypto: %d\n", res); } if (ctx == NULL) { /* Directory is not encrypted */ @@ -637,7 +642,6 @@ static struct stats dx_show_leaf(struct inode *dir, "allocating crypto " "buffer--skipping " "crypto\n"); - ext4_put_fname_crypto_ctx(&ctx); ctx = NULL; } res = ext4_fname_disk_to_usr(ctx, NULL, de, @@ -658,7 +662,6 @@ static struct stats dx_show_leaf(struct inode *dir, printk("%*.s:(E)%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); - ext4_put_fname_crypto_ctx(&ctx); ext4_fname_crypto_free_buffer( &fname_crypto_str); } @@ -724,7 +727,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, * back to userspace. */ static struct dx_frame * -dx_probe(const struct qstr *d_name, struct inode *dir, +dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in) { unsigned count, indirect; @@ -742,56 +745,41 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_LEGACY) { - ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", - root->info.hash_version); + ext4_warning_inode(dir, "Unrecognised inode hash code %u", + root->info.hash_version); goto fail; } + if (fname) + hinfo = &fname->hinfo; hinfo->hash_version = root->info.hash_version; if (hinfo->hash_version <= DX_HASH_TEA) hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - if (d_name) { - struct ext4_fname_crypto_ctx *ctx = NULL; - int res; - - /* Check if the directory is encrypted */ - ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); - if (IS_ERR(ctx)) { - ret_err = ERR_PTR(PTR_ERR(ctx)); - goto fail; - } - res = ext4_fname_usr_to_hash(ctx, d_name, hinfo); - if (res < 0) { - ret_err = ERR_PTR(res); - goto fail; - } - ext4_put_fname_crypto_ctx(&ctx); - } -#else - if (d_name) - ext4fs_dirhash(d_name->name, d_name->len, hinfo); -#endif + if (fname && fname_name(fname)) + ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo); hash = hinfo->hash; if (root->info.unused_flags & 1) { - ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", - root->info.unused_flags); + ext4_warning_inode(dir, "Unimplemented hash flags: %#06x", + root->info.unused_flags); goto fail; } - if ((indirect = root->info.indirect_levels) > 1) { - ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", - root->info.indirect_levels); + indirect = root->info.indirect_levels; + if (indirect > 1) { + ext4_warning_inode(dir, "Unimplemented hash depth: %#06x", + root->info.indirect_levels); goto fail; } - entries = (struct dx_entry *) (((char *)&root->info) + - root->info.info_length); + entries = (struct dx_entry *)(((char *)&root->info) + + root->info.info_length); if (dx_get_limit(entries) != dx_root_limit(dir, root->info.info_length)) { - ext4_warning(dir->i_sb, "dx entry: limit != root limit"); + ext4_warning_inode(dir, "dx entry: limit %u != root limit %u", + dx_get_limit(entries), + dx_root_limit(dir, root->info.info_length)); goto fail; } @@ -799,15 +787,16 @@ dx_probe(const struct qstr *d_name, struct inode *dir, while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { - ext4_warning(dir->i_sb, - "dx entry: no count or count > limit"); + ext4_warning_inode(dir, + "dx entry: count %u beyond limit %u", + count, dx_get_limit(entries)); goto fail; } p = entries + 1; q = entries + count - 1; while (p <= q) { - m = p + (q - p)/2; + m = p + (q - p) / 2; dxtrace(printk(".")); if (dx_get_hash(m) > hash) q = m - 1; @@ -831,7 +820,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir, } at = p - 1; - dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); + dxtrace(printk(" %x->%u\n", at == entries ? 0 : dx_get_hash(at), + dx_get_block(at))); frame->entries = entries; frame->at = at; if (!indirect--) @@ -845,9 +835,10 @@ dx_probe(const struct qstr *d_name, struct inode *dir, } entries = ((struct dx_node *) frame->bh->b_data)->entries; - if (dx_get_limit(entries) != dx_node_limit (dir)) { - ext4_warning(dir->i_sb, - "dx entry: limit != node limit"); + if (dx_get_limit(entries) != dx_node_limit(dir)) { + ext4_warning_inode(dir, + "dx entry: limit %u != node limit %u", + dx_get_limit(entries), dx_node_limit(dir)); goto fail; } } @@ -858,18 +849,17 @@ fail: } if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) - ext4_warning(dir->i_sb, - "Corrupt dir inode %lu, running e2fsck is " - "recommended.", dir->i_ino); + ext4_warning_inode(dir, + "Corrupt directory, running e2fsck is recommended"); return ret_err; } -static void dx_release (struct dx_frame *frames) +static void dx_release(struct dx_frame *frames) { if (frames[0].bh == NULL) return; - if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) + if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels) brelse(frames[1].bh); brelse(frames[0].bh); } @@ -962,7 +952,6 @@ static int htree_dirblock_to_tree(struct file *dir_file, struct buffer_head *bh; struct ext4_dir_entry_2 *de, *top; int err = 0, count = 0; - struct ext4_fname_crypto_ctx *ctx = NULL; struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str; dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", @@ -977,17 +966,15 @@ static int htree_dirblock_to_tree(struct file *dir_file, EXT4_DIR_REC_LEN(0)); #ifdef CONFIG_EXT4_FS_ENCRYPTION /* Check if the directory is encrypted */ - ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - brelse(bh); - return err; - } - if (ctx != NULL) { - err = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, + if (ext4_encrypted_inode(dir)) { + err = ext4_get_encryption_info(dir); + if (err < 0) { + brelse(bh); + return err; + } + err = ext4_fname_crypto_alloc_buffer(dir, EXT4_NAME_LEN, &fname_crypto_str); if (err < 0) { - ext4_put_fname_crypto_ctx(&ctx); brelse(bh); return err; } @@ -1008,16 +995,17 @@ static int htree_dirblock_to_tree(struct file *dir_file, continue; if (de->inode == 0) continue; - if (ctx == NULL) { - /* Directory is not encrypted */ + if (!ext4_encrypted_inode(dir)) { tmp_str.name = de->name; tmp_str.len = de->name_len; err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &tmp_str); } else { + int save_len = fname_crypto_str.len; + /* Directory is encrypted */ - err = ext4_fname_disk_to_usr(ctx, hinfo, de, + err = ext4_fname_disk_to_usr(dir, hinfo, de, &fname_crypto_str); if (err < 0) { count = err; @@ -1026,6 +1014,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &fname_crypto_str); + fname_crypto_str.len = save_len; } if (err != 0) { count = err; @@ -1036,7 +1025,6 @@ static int htree_dirblock_to_tree(struct file *dir_file, errout: brelse(bh); #ifdef CONFIG_EXT4_FS_ENCRYPTION - ext4_put_fname_crypto_ctx(&ctx); ext4_fname_crypto_free_buffer(&fname_crypto_str); #endif return count; @@ -1155,12 +1143,13 @@ errout: static inline int search_dirblock(struct buffer_head *bh, struct inode *dir, + struct ext4_filename *fname, const struct qstr *d_name, unsigned int offset, struct ext4_dir_entry_2 **res_dir) { - return search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir, - d_name, offset, res_dir); + return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir, + fname, d_name, offset, res_dir); } /* @@ -1242,54 +1231,54 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) * `len <= EXT4_NAME_LEN' is guaranteed by caller. * `de != NULL' is guaranteed by caller. */ -static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx, - struct ext4_str *fname_crypto_str, - int len, const char * const name, +static inline int ext4_match(struct ext4_filename *fname, struct ext4_dir_entry_2 *de) { - int res; + const void *name = fname_name(fname); + u32 len = fname_len(fname); if (!de->inode) return 0; #ifdef CONFIG_EXT4_FS_ENCRYPTION - if (ctx) - return ext4_fname_match(ctx, fname_crypto_str, len, name, de); + if (unlikely(!name)) { + if (fname->usr_fname->name[0] == '_') { + int ret; + if (de->name_len < 16) + return 0; + ret = memcmp(de->name + de->name_len - 16, + fname->crypto_buf.name + 8, 16); + return (ret == 0) ? 1 : 0; + } + name = fname->crypto_buf.name; + len = fname->crypto_buf.len; + } #endif - if (len != de->name_len) + if (de->name_len != len) return 0; - res = memcmp(name, de->name, len); - return (res == 0) ? 1 : 0; + return (memcmp(de->name, name, len) == 0) ? 1 : 0; } /* * Returns 0 if not found, -1 on failure, and 1 on success */ -int search_dir(struct buffer_head *bh, char *search_buf, int buf_size, - struct inode *dir, const struct qstr *d_name, - unsigned int offset, struct ext4_dir_entry_2 **res_dir) +int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, + struct inode *dir, struct ext4_filename *fname, + const struct qstr *d_name, + unsigned int offset, struct ext4_dir_entry_2 **res_dir) { struct ext4_dir_entry_2 * de; char * dlimit; int de_len; - const char *name = d_name->name; - int namelen = d_name->len; - struct ext4_fname_crypto_ctx *ctx = NULL; - struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; int res; - ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); - if (IS_ERR(ctx)) - return -1; - de = (struct ext4_dir_entry_2 *)search_buf; dlimit = search_buf + buf_size; while ((char *) de < dlimit) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ if ((char *) de + de->name_len <= dlimit) { - res = ext4_match(ctx, &fname_crypto_str, namelen, - name, de); + res = ext4_match(fname, de); if (res < 0) { res = -1; goto return_result; @@ -1322,8 +1311,6 @@ int search_dir(struct buffer_head *bh, char *search_buf, int buf_size, res = 0; return_result: - ext4_put_fname_crypto_ctx(&ctx); - ext4_fname_crypto_free_buffer(&fname_crypto_str); return res; } @@ -1370,7 +1357,8 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, buffer */ int num = 0; ext4_lblk_t nblocks; - int i, namelen; + int i, namelen, retval; + struct ext4_filename fname; *res_dir = NULL; sb = dir->i_sb; @@ -1378,14 +1366,18 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, if (namelen > EXT4_NAME_LEN) return NULL; + retval = ext4_fname_setup_filename(dir, d_name, 1, &fname); + if (retval) + return ERR_PTR(retval); + if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - ret = ext4_find_inline_entry(dir, d_name, res_dir, + ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir, &has_inline_data); if (has_inline_data) { if (inlined) *inlined = 1; - return ret; + goto cleanup_and_exit; } } @@ -1400,14 +1392,14 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto restart; } if (is_dx(dir)) { - bh = ext4_dx_find_entry(dir, d_name, res_dir); + ret = ext4_dx_find_entry(dir, &fname, res_dir); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the * old fashioned way. */ - if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR) - return bh; + if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR) + goto cleanup_and_exit; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); } @@ -1437,9 +1429,11 @@ restart: } num++; bh = ext4_getblk(NULL, dir, b++, 0); - if (unlikely(IS_ERR(bh))) { - if (ra_max == 0) - return bh; + if (IS_ERR(bh)) { + if (ra_max == 0) { + ret = bh; + goto cleanup_and_exit; + } break; } bh_use[ra_max] = bh; @@ -1469,7 +1463,7 @@ restart: goto next; } set_buffer_verified(bh); - i = search_dirblock(bh, dir, d_name, + i = search_dirblock(bh, dir, &fname, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; @@ -1500,15 +1494,17 @@ cleanup_and_exit: /* Clean up the read-ahead blocks */ for (; ra_ptr < ra_max; ra_ptr++) brelse(bh_use[ra_ptr]); + ext4_fname_free_filename(&fname); return ret; } -static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir) +static struct buffer_head * ext4_dx_find_entry(struct inode *dir, + struct ext4_filename *fname, + struct ext4_dir_entry_2 **res_dir) { struct super_block * sb = dir->i_sb; - struct dx_hash_info hinfo; struct dx_frame frames[2], *frame; + const struct qstr *d_name = fname->usr_fname; struct buffer_head *bh; ext4_lblk_t block; int retval; @@ -1516,7 +1512,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q #ifdef CONFIG_EXT4_FS_ENCRYPTION *res_dir = NULL; #endif - frame = dx_probe(d_name, dir, &hinfo, frames); + frame = dx_probe(fname, dir, NULL, frames); if (IS_ERR(frame)) return (struct buffer_head *) frame; do { @@ -1525,7 +1521,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q if (IS_ERR(bh)) goto errout; - retval = search_dirblock(bh, dir, d_name, + retval = search_dirblock(bh, dir, fname, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (retval == 1) @@ -1537,12 +1533,12 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q } /* Check to see if we should continue to search */ - retval = ext4_htree_next_block(dir, hinfo.hash, frame, + retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame, frames, NULL); if (retval < 0) { - ext4_warning(sb, - "error %d reading index page in directory #%lu", - retval, dir->i_ino); + ext4_warning_inode(dir, + "error %d reading directory index block", + retval); bh = ERR_PTR(retval); goto errout; } @@ -1574,19 +1570,19 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } if (unlikely(ino == dir->i_ino)) { EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir", dentry); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } inode = ext4_iget_normal(dir->i_sb, ino); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, "deleted inode referenced: %u", ino); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || @@ -1623,7 +1619,7 @@ struct dentry *ext4_get_parent(struct dentry *child) if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) { EXT4_ERROR_INODE(d_inode(child), "bad parent inode number: %u", ino); - return ERR_PTR(-EIO); + return ERR_PTR(-EFSCORRUPTED); } return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino)); @@ -1796,42 +1792,26 @@ journal_error: int ext4_find_dest_de(struct inode *dir, struct inode *inode, struct buffer_head *bh, void *buf, int buf_size, - const char *name, int namelen, + struct ext4_filename *fname, struct ext4_dir_entry_2 **dest_de) { struct ext4_dir_entry_2 *de; - unsigned short reclen = EXT4_DIR_REC_LEN(namelen); + unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname)); int nlen, rlen; unsigned int offset = 0; char *top; - struct ext4_fname_crypto_ctx *ctx = NULL; - struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; int res; - ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); - if (IS_ERR(ctx)) - return -1; - - if (ctx != NULL) { - /* Calculate record length needed to store the entry */ - res = ext4_fname_crypto_namelen_on_disk(ctx, namelen); - if (res < 0) { - ext4_put_fname_crypto_ctx(&ctx); - return res; - } - reclen = EXT4_DIR_REC_LEN(res); - } - de = (struct ext4_dir_entry_2 *)buf; top = buf + buf_size - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, buf, buf_size, offset)) { - res = -EIO; + res = -EFSCORRUPTED; goto return_result; } /* Provide crypto context and crypto buffer to ext4 match */ - res = ext4_match(ctx, &fname_crypto_str, namelen, name, de); + res = ext4_match(fname, de); if (res < 0) goto return_result; if (res > 0) { @@ -1853,8 +1833,6 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, res = 0; } return_result: - ext4_put_fname_crypto_ctx(&ctx); - ext4_fname_crypto_free_buffer(&fname_crypto_str); return res; } @@ -1862,39 +1840,10 @@ int ext4_insert_dentry(struct inode *dir, struct inode *inode, struct ext4_dir_entry_2 *de, int buf_size, - const struct qstr *iname, - const char *name, int namelen) + struct ext4_filename *fname) { int nlen, rlen; - struct ext4_fname_crypto_ctx *ctx = NULL; - struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; - struct ext4_str tmp_str; - int res; - - ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); - if (IS_ERR(ctx)) - return -EIO; - /* By default, the input name would be written to the disk */ - tmp_str.name = (unsigned char *)name; - tmp_str.len = namelen; - if (ctx != NULL) { - /* Directory is encrypted */ - res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, - &fname_crypto_str); - if (res < 0) { - ext4_put_fname_crypto_ctx(&ctx); - return -ENOMEM; - } - res = ext4_fname_usr_to_disk(ctx, iname, &fname_crypto_str); - if (res < 0) { - ext4_put_fname_crypto_ctx(&ctx); - ext4_fname_crypto_free_buffer(&fname_crypto_str); - return res; - } - tmp_str.name = fname_crypto_str.name; - tmp_str.len = fname_crypto_str.len; - } nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); @@ -1908,11 +1857,8 @@ int ext4_insert_dentry(struct inode *dir, de->file_type = EXT4_FT_UNKNOWN; de->inode = cpu_to_le32(inode->i_ino); ext4_set_de_type(inode->i_sb, de, inode->i_mode); - de->name_len = tmp_str.len; - - memcpy(de->name, tmp_str.name, tmp_str.len); - ext4_put_fname_crypto_ctx(&ctx); - ext4_fname_crypto_free_buffer(&fname_crypto_str); + de->name_len = fname_len(fname); + memcpy(de->name, fname_name(fname), fname_len(fname)); return 0; } @@ -1924,13 +1870,11 @@ int ext4_insert_dentry(struct inode *dir, * space. It will return -ENOSPC if no space is available, and -EIO * and -EEXIST if directory entry already exists. */ -static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, +static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, + struct inode *dir, struct inode *inode, struct ext4_dir_entry_2 *de, struct buffer_head *bh) { - struct inode *dir = d_inode(dentry->d_parent); - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; int err; @@ -1939,9 +1883,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, csum_size = sizeof(struct ext4_dir_entry_tail); if (!de) { - err = ext4_find_dest_de(dir, inode, - bh, bh->b_data, blocksize - csum_size, - name, namelen, &de); + err = ext4_find_dest_de(dir, inode, bh, bh->b_data, + blocksize - csum_size, fname, &de); if (err) return err; } @@ -1954,8 +1897,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, /* By now the buffer is marked for journaling. Due to crypto operations, * the following function call may fail */ - err = ext4_insert_dentry(dir, inode, de, blocksize, &dentry->d_name, - name, namelen); + err = ext4_insert_dentry(dir, inode, de, blocksize, fname); if (err < 0) return err; @@ -1985,17 +1927,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, * This converts a one block unindexed directory to a 3 block indexed * directory, and adds the dentry to the indexed directory. */ -static int make_indexed_dir(handle_t *handle, struct dentry *dentry, +static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, + struct dentry *dentry, struct inode *inode, struct buffer_head *bh) { struct inode *dir = d_inode(dentry->d_parent); -#ifdef CONFIG_EXT4_FS_ENCRYPTION - struct ext4_fname_crypto_ctx *ctx = NULL; - int res; -#else - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; -#endif struct buffer_head *bh2; struct dx_root *root; struct dx_frame frames[2], *frame; @@ -2006,17 +1942,10 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, unsigned len; int retval; unsigned blocksize; - struct dx_hash_info hinfo; ext4_lblk_t block; struct fake_dirent *fde; int csum_size = 0; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); -#endif - if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); @@ -2038,7 +1967,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, if ((char *) de >= (((char *) root) + blocksize)) { EXT4_ERROR_INODE(dir, "invalid rec_len for '..'"); brelse(bh); - return -EIO; + return -EFSCORRUPTED; } len = ((char *) root) + (blocksize - csum_size) - (char *) de; @@ -2078,22 +2007,12 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); /* Initialize as for dx_probe */ - hinfo.hash_version = root->info.hash_version; - if (hinfo.hash_version <= DX_HASH_TEA) - hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; - hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - res = ext4_fname_usr_to_hash(ctx, &dentry->d_name, &hinfo); - if (res < 0) { - ext4_put_fname_crypto_ctx(&ctx); - ext4_mark_inode_dirty(handle, dir); - brelse(bh); - return res; - } - ext4_put_fname_crypto_ctx(&ctx); -#else - ext4fs_dirhash(name, namelen, &hinfo); -#endif + fname->hinfo.hash_version = root->info.hash_version; + if (fname->hinfo.hash_version <= DX_HASH_TEA) + fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; + fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; + ext4fs_dirhash(fname_name(fname), fname_len(fname), &fname->hinfo); + memset(frames, 0, sizeof(frames)); frame = frames; frame->entries = entries; @@ -2108,14 +2027,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, if (retval) goto out_frames; - de = do_split(handle,dir, &bh, frame, &hinfo); + de = do_split(handle,dir, &bh, frame, &fname->hinfo); if (IS_ERR(de)) { retval = PTR_ERR(de); goto out_frames; } dx_release(frames); - retval = add_dirent_to_buf(handle, dentry, inode, de, bh); + retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh); brelse(bh); return retval; out_frames: @@ -2147,6 +2066,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct ext4_dir_entry_2 *de; struct ext4_dir_entry_tail *t; struct super_block *sb; + struct ext4_filename fname; int retval; int dx_fallback=0; unsigned blocksize; @@ -2161,10 +2081,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (!dentry->d_name.len) return -EINVAL; + retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname); + if (retval) + return retval; + if (ext4_has_inline_data(dir)) { - retval = ext4_try_add_inline_entry(handle, dentry, inode); + retval = ext4_try_add_inline_entry(handle, &fname, + dentry, inode); if (retval < 0) - return retval; + goto out; if (retval == 1) { retval = 0; goto out; @@ -2172,7 +2097,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } if (is_dx(dir)) { - retval = ext4_dx_add_entry(handle, dentry, inode); + retval = ext4_dx_add_entry(handle, &fname, dentry, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); @@ -2182,24 +2107,31 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); - if (IS_ERR(bh)) - return PTR_ERR(bh); - - retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); + if (IS_ERR(bh)) { + retval = PTR_ERR(bh); + bh = NULL; + goto out; + } + retval = add_dirent_to_buf(handle, &fname, dir, inode, + NULL, bh); if (retval != -ENOSPC) goto out; if (blocks == 1 && !dx_fallback && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { - retval = make_indexed_dir(handle, dentry, inode, bh); + ext4_has_feature_dir_index(sb)) { + retval = make_indexed_dir(handle, &fname, dentry, + inode, bh); bh = NULL; /* make_indexed_dir releases bh */ goto out; } brelse(bh); } bh = ext4_append(handle, dir, &block); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + retval = PTR_ERR(bh); + bh = NULL; + goto out; + } de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); @@ -2209,8 +2141,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, initialize_dirent_tail(t, blocksize); } - retval = add_dirent_to_buf(handle, dentry, inode, de, bh); + retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh); out: + ext4_fname_free_filename(&fname); brelse(bh); if (retval == 0) ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); @@ -2220,19 +2153,18 @@ out: /* * Returns 0 for success, or a negative error value */ -static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode) +static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, + struct dentry *dentry, struct inode *inode) { struct dx_frame frames[2], *frame; struct dx_entry *entries, *at; - struct dx_hash_info hinfo; struct buffer_head *bh; struct inode *dir = d_inode(dentry->d_parent); struct super_block *sb = dir->i_sb; struct ext4_dir_entry_2 *de; int err; - frame = dx_probe(&dentry->d_name, dir, &hinfo, frames); + frame = dx_probe(fname, dir, NULL, frames); if (IS_ERR(frame)) return PTR_ERR(frame); entries = frame->entries; @@ -2249,7 +2181,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; - err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); + err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh); if (err != -ENOSPC) goto cleanup; @@ -2267,7 +2199,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels && (dx_get_count(frames->entries) == dx_get_limit(frames->entries))) { - ext4_warning(sb, "Directory index full!"); + ext4_warning_inode(dir, "Directory index full!"); err = -ENOSPC; goto cleanup; } @@ -2345,12 +2277,12 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, goto cleanup; } } - de = do_split(handle, dir, &bh, frame, &hinfo); + de = do_split(handle, dir, &bh, frame, &fname->hinfo); if (IS_ERR(de)) { err = PTR_ERR(de); goto cleanup; } - err = add_dirent_to_buf(handle, dentry, inode, de, bh); + err = add_dirent_to_buf(handle, fname, dir, inode, de, bh); goto cleanup; journal_error: @@ -2383,7 +2315,7 @@ int ext4_generic_delete_entry(handle_t *handle, while (i < buf_size - csum_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, bh->b_size, i)) - return -EIO; + return -EFSCORRUPTED; if (de == de_del) { if (pde) pde->rec_len = ext4_rec_len_to_disk( @@ -2456,8 +2388,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode) /* limit is 16-bit i_links_count */ if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { set_nlink(inode, 1); - EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_DIR_NLINK); + ext4_set_feature_dir_nlink(inode->i_sb); } } } @@ -2504,7 +2435,9 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct inode *inode; int err, credits, retries = 0; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); @@ -2517,20 +2450,7 @@ retry: inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); - err = 0; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - if (!err && (ext4_encrypted_inode(dir) || - DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)))) { - err = ext4_inherit_context(dir, inode); - if (err) { - clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); - } - } -#endif - if (!err) - err = ext4_add_nondir(handle, dentry, inode); + err = ext4_add_nondir(handle, dentry, inode); if (!err && IS_DIRSYNC(dir)) ext4_handle_sync(handle); } @@ -2548,10 +2468,9 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err, credits, retries = 0; - if (!new_valid_dev(rdev)) - return -EINVAL; - - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); @@ -2580,7 +2499,9 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err, retries = 0; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; retry: inode = ext4_new_inode_start_handle(dir, mode, @@ -2693,7 +2614,9 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); @@ -2711,14 +2634,6 @@ retry: err = ext4_init_new_dir(handle, dir, inode); if (err) goto out_clear_inode; -#ifdef CONFIG_EXT4_FS_ENCRYPTION - if (ext4_encrypted_inode(dir) || - DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) { - err = ext4_inherit_context(dir, inode); - if (err) - goto out_clear_inode; - } -#endif err = ext4_mark_inode_dirty(handle, inode); if (!err) err = ext4_add_entry(handle, dentry, inode); @@ -2779,12 +2694,9 @@ int ext4_empty_dir(struct inode *inode) de = (struct ext4_dir_entry_2 *) bh->b_data; de1 = ext4_next_entry(de, sb->s_blocksize); if (le32_to_cpu(de->inode) != inode->i_ino || - !le32_to_cpu(de1->inode) || - strcmp(".", de->name) || - strcmp("..", de1->name)) { - ext4_warning(inode->i_sb, - "bad directory (dir #%lu) - no `.' or `..'", - inode->i_ino); + le32_to_cpu(de1->inode) == 0 || + strcmp(".", de->name) || strcmp("..", de1->name)) { + ext4_warning_inode(inode, "directory missing '.' and/or '..'"); brelse(bh); return 1; } @@ -3002,8 +2914,12 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go in * separate transaction */ - dquot_initialize(dir); - dquot_initialize(d_inode(dentry)); + retval = dquot_initialize(dir); + if (retval) + return retval; + retval = dquot_initialize(d_inode(dentry)); + if (retval) + return retval; retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); @@ -3014,7 +2930,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) inode = d_inode(dentry); - retval = -EIO; + retval = -EFSCORRUPTED; if (le32_to_cpu(de->inode) != inode->i_ino) goto end_rmdir; @@ -3037,8 +2953,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) if (retval) goto end_rmdir; if (!EXT4_DIR_LINK_EMPTY(inode)) - ext4_warning(inode->i_sb, - "empty directory has too many links (%d)", + ext4_warning_inode(inode, + "empty directory '%.*s' has too many links (%u)", + dentry->d_name.len, dentry->d_name.name, inode->i_nlink); inode->i_version++; clear_nlink(inode); @@ -3071,8 +2988,12 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) trace_ext4_unlink_enter(dir, dentry); /* Initialize quotas before so that eventual writes go * in separate transaction */ - dquot_initialize(dir); - dquot_initialize(d_inode(dentry)); + retval = dquot_initialize(dir); + if (retval) + return retval; + retval = dquot_initialize(d_inode(dentry)); + if (retval) + return retval; retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); @@ -3083,7 +3004,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) inode = d_inode(dentry); - retval = -EIO; + retval = -EFSCORRUPTED; if (le32_to_cpu(de->inode) != inode->i_ino) goto end_unlink; @@ -3098,10 +3019,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - if (!inode->i_nlink) { - ext4_warning(inode->i_sb, - "Deleting nonexistent file (%lu), %d", - inode->i_ino, inode->i_nlink); + if (inode->i_nlink == 0) { + ext4_warning_inode(inode, "Deleting file '%.*s' with no links", + dentry->d_name.len, dentry->d_name.name); set_nlink(inode, 1); } retval = ext4_delete_entry(handle, dir, de, bh); @@ -3140,12 +3060,27 @@ static int ext4_symlink(struct inode *dir, encryption_required = (ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))); - if (encryption_required) - disk_link.len = encrypted_symlink_data_len(len) + 1; - if (disk_link.len > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; + if (encryption_required) { + err = ext4_get_encryption_info(dir); + if (err) + return err; + if (ext4_encryption_info(dir) == NULL) + return -EPERM; + disk_link.len = (ext4_fname_encrypted_size(dir, len) + + sizeof(struct ext4_encrypted_symlink_data)); + sd = kzalloc(disk_link.len, GFP_KERNEL); + if (!sd) + return -ENOMEM; + } + + if (disk_link.len > dir->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto err_free_sd; + } - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + goto err_free_sd; if ((disk_link.len > EXT4_N_BLOCKS * 4)) { /* @@ -3174,42 +3109,29 @@ static int ext4_symlink(struct inode *dir, if (IS_ERR(inode)) { if (handle) ext4_journal_stop(handle); - return PTR_ERR(inode); + err = PTR_ERR(inode); + goto err_free_sd; } if (encryption_required) { - struct ext4_fname_crypto_ctx *ctx = NULL; struct qstr istr; struct ext4_str ostr; - sd = kzalloc(disk_link.len, GFP_NOFS); - if (!sd) { - err = -ENOMEM; - goto err_drop_inode; - } - err = ext4_inherit_context(dir, inode); - if (err) - goto err_drop_inode; - ctx = ext4_get_fname_crypto_ctx(inode, - inode->i_sb->s_blocksize); - if (IS_ERR_OR_NULL(ctx)) { - /* We just set the policy, so ctx should not be NULL */ - err = (ctx == NULL) ? -EIO : PTR_ERR(ctx); - goto err_drop_inode; - } istr.name = (const unsigned char *) symname; istr.len = len; ostr.name = sd->encrypted_path; - err = ext4_fname_usr_to_disk(ctx, &istr, &ostr); - ext4_put_fname_crypto_ctx(&ctx); + ostr.len = disk_link.len; + err = ext4_fname_usr_to_disk(inode, &istr, &ostr); if (err < 0) goto err_drop_inode; sd->len = cpu_to_le16(ostr.len); disk_link.name = (char *) sd; + inode->i_op = &ext4_encrypted_symlink_inode_operations; } if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - inode->i_op = &ext4_symlink_inode_operations; + if (!encryption_required) + inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); /* * We cannot call page_symlink() with transaction started @@ -3249,9 +3171,10 @@ static int ext4_symlink(struct inode *dir, } else { /* clear the extent format for fast symlink */ ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); - inode->i_op = encryption_required ? - &ext4_symlink_inode_operations : - &ext4_fast_symlink_inode_operations; + if (!encryption_required) { + inode->i_op = &ext4_fast_symlink_inode_operations; + inode->i_link = (char *)&EXT4_I(inode)->i_data; + } memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, disk_link.len); inode->i_size = disk_link.len - 1; @@ -3268,10 +3191,11 @@ static int ext4_symlink(struct inode *dir, err_drop_inode: if (handle) ext4_journal_stop(handle); - kfree(sd); clear_nlink(inode); unlock_new_inode(inode); iput(inode); +err_free_sd: + kfree(sd); return err; } @@ -3287,7 +3211,9 @@ static int ext4_link(struct dentry *old_dentry, if (ext4_encrypted_inode(dir) && !ext4_is_child_context_consistent_with_parent(dir, inode)) return -EPERM; - dquot_initialize(dir); + err = dquot_initialize(dir); + if (err) + return err; retry: handle = ext4_journal_start(dir, EXT4_HT_DIR, @@ -3380,7 +3306,7 @@ static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) if (!ent->dir_bh) return retval; if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino) - return -EIO; + return -EFSCORRUPTED; BUFFER_TRACE(ent->dir_bh, "get_write_access"); return ext4_journal_get_write_access(handle, ent->dir_bh); } @@ -3422,8 +3348,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, if (retval) return retval; ent->de->inode = cpu_to_le32(ino); - if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb, - EXT4_FEATURE_INCOMPAT_FILETYPE)) + if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; ent->dir->i_version++; ent->dir->i_ctime = ent->dir->i_mtime = @@ -3487,9 +3412,9 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent, } if (retval) { - ext4_warning(ent->dir->i_sb, - "Deleting old file (%lu), %d, error=%d", - ent->dir->i_ino, ent->dir->i_nlink, retval); + ext4_warning_inode(ent->dir, + "Deleting old file: nlink %d, error=%d", + ent->dir->i_nlink, retval); } } @@ -3566,13 +3491,20 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, int credits; u8 old_file_type; - dquot_initialize(old.dir); - dquot_initialize(new.dir); + retval = dquot_initialize(old.dir); + if (retval) + return retval; + retval = dquot_initialize(new.dir); + if (retval) + return retval; /* Initialize quotas before so that eventual writes go * in separate transaction */ - if (new.inode) - dquot_initialize(new.inode); + if (new.inode) { + retval = dquot_initialize(new.inode); + if (retval) + return retval; + } old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); if (IS_ERR(old.bh)) @@ -3759,8 +3691,21 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, u8 new_file_type; int retval; - dquot_initialize(old.dir); - dquot_initialize(new.dir); + if ((ext4_encrypted_inode(old_dir) || + ext4_encrypted_inode(new_dir)) && + (old_dir != new_dir) && + (!ext4_is_child_context_consistent_with_parent(new_dir, + old.inode) || + !ext4_is_child_context_consistent_with_parent(old_dir, + new.inode))) + return -EPERM; + + retval = dquot_initialize(old.dir); + if (retval) + return retval; + retval = dquot_initialize(new.dir); + if (retval) + return retval; old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); diff --git a/kernel/fs/ext4/page-io.c b/kernel/fs/ext4/page-io.c index 5765f88b3..17fbe3882 100644 --- a/kernel/fs/ext4/page-io.c +++ b/kernel/fs/ext4/page-io.c @@ -61,7 +61,6 @@ static void buffer_io_error(struct buffer_head *bh) static void ext4_finish_bio(struct bio *bio) { int i; - int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec; bio_for_each_segment_all(bvec, bio, i) { @@ -84,11 +83,11 @@ static void ext4_finish_bio(struct bio *bio) /* The bounce data pages are unmapped. */ data_page = page; ctx = (struct ext4_crypto_ctx *)page_private(data_page); - page = ctx->control_page; + page = ctx->w.control_page; } #endif - if (error) { + if (bio->bi_error) { SetPageError(page); set_bit(AS_EIO, &page->mapping->flags); } @@ -107,7 +106,7 @@ static void ext4_finish_bio(struct bio *bio) continue; } clear_buffer_async_write(bh); - if (error) + if (bio->bi_error) buffer_io_error(bh); } while ((bh = bh->b_this_page) != head); bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); @@ -310,27 +309,25 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) } /* BIO completion function for page writeback */ -static void ext4_end_bio(struct bio *bio, int error) +static void ext4_end_bio(struct bio *bio) { ext4_io_end_t *io_end = bio->bi_private; sector_t bi_sector = bio->bi_iter.bi_sector; BUG_ON(!io_end); bio->bi_end_io = NULL; - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = 0; - if (error) { + if (bio->bi_error) { struct inode *inode = io_end->inode; ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " "(offset %llu size %ld starting block %llu)", - error, inode->i_ino, + bio->bi_error, inode->i_ino, (unsigned long long) io_end->offset, (long) io_end->size, (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); - mapping_set_error(inode->i_mapping, error); + mapping_set_error(inode->i_mapping, bio->bi_error); } if (io_end->flag & EXT4_IO_END_UNWRITTEN) { @@ -357,9 +354,10 @@ void ext4_io_submit(struct ext4_io_submit *io) struct bio *bio = io->io_bio; if (bio) { + int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC : WRITE; bio_get(io->io_bio); - submit_bio(io->io_op, io->io_bio); - BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); + submit_bio(io_op, io->io_bio); bio_put(io->io_bio); } io->io_bio = NULL; @@ -368,7 +366,7 @@ void ext4_io_submit(struct ext4_io_submit *io) void ext4_io_submit_init(struct ext4_io_submit *io, struct writeback_control *wbc) { - io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + io->io_wbc = wbc; io->io_bio = NULL; io->io_end = NULL; } @@ -376,12 +374,12 @@ void ext4_io_submit_init(struct ext4_io_submit *io, static int io_submit_init_bio(struct ext4_io_submit *io, struct buffer_head *bh) { - int nvecs = bio_get_nr_vecs(bh->b_bdev); struct bio *bio; - bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; + wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_bdev = bh->b_bdev; bio->bi_end_io = ext4_end_bio; @@ -410,6 +408,7 @@ submit_and_retry: ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); if (ret != bh->b_size) goto submit_and_retry; + wbc_account_io(io->io_wbc, page, bh->b_size); io->io_next_block++; return 0; } @@ -426,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct buffer_head *bh, *head; int ret = 0; int nr_submitted = 0; + int nr_to_submit = 0; blocksize = 1 << inode->i_blkbits; @@ -478,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } set_buffer_async_write(bh); + nr_to_submit++; } while ((bh = bh->b_this_page) != head); bh = head = page_buffers(page); - if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && + nr_to_submit) { data_page = ext4_encrypt(inode, page); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); diff --git a/kernel/fs/ext4/readpage.c b/kernel/fs/ext4/readpage.c index 171b9ac4b..5dc5e9506 100644 --- a/kernel/fs/ext4/readpage.c +++ b/kernel/fs/ext4/readpage.c @@ -54,15 +54,15 @@ static void completion_pages(struct work_struct *work) { #ifdef CONFIG_EXT4_FS_ENCRYPTION struct ext4_crypto_ctx *ctx = - container_of(work, struct ext4_crypto_ctx, work); - struct bio *bio = ctx->bio; + container_of(work, struct ext4_crypto_ctx, r.work); + struct bio *bio = ctx->r.bio; struct bio_vec *bv; int i; bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - int ret = ext4_decrypt(ctx, page); + int ret = ext4_decrypt(page); if (ret) { WARN_ON_ONCE(1); SetPageError(page); @@ -98,7 +98,7 @@ static inline bool ext4_bio_encrypted(struct bio *bio) * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ -static void mpage_end_io(struct bio *bio, int err) +static void mpage_end_io(struct bio *bio) { struct bio_vec *bv; int i; @@ -106,19 +106,19 @@ static void mpage_end_io(struct bio *bio, int err) if (ext4_bio_encrypted(bio)) { struct ext4_crypto_ctx *ctx = bio->bi_private; - if (err) { + if (bio->bi_error) { ext4_release_crypto_ctx(ctx); } else { - INIT_WORK(&ctx->work, completion_pages); - ctx->bio = bio; - queue_work(ext4_read_workqueue, &ctx->work); + INIT_WORK(&ctx->r.work, completion_pages); + ctx->r.bio = bio; + queue_work(ext4_read_workqueue, &ctx->r.work); return; } } bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - if (!err) { + if (!bio->bi_error) { SetPageUptodate(page); } else { ClearPageUptodate(page); @@ -165,8 +165,8 @@ int ext4_mpage_readpages(struct address_space *mapping, if (pages) { page = list_entry(pages->prev, struct page, lru); list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, - page->index, GFP_KERNEL)) + if (add_to_page_cache_lru(page, mapping, page->index, + mapping_gfp_constraint(mapping, GFP_KERNEL))) goto next_page; } @@ -284,7 +284,7 @@ int ext4_mpage_readpages(struct address_space *mapping, goto set_error_page; } bio = bio_alloc(GFP_KERNEL, - min_t(int, nr_pages, bio_get_nr_vecs(bdev))); + min_t(int, nr_pages, BIO_MAX_PAGES)); if (!bio) { if (ctx) ext4_release_crypto_ctx(ctx); diff --git a/kernel/fs/ext4/resize.c b/kernel/fs/ext4/resize.c index cf0c47204..34038e359 100644 --- a/kernel/fs/ext4/resize.c +++ b/kernel/fs/ext4/resize.c @@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) goto out2; flex_gd->count = flexbg_size; @@ -490,7 +490,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, group_data[0].group != sbi->s_groups_count); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); /* This transaction may be extended/restarted along the way */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA); @@ -680,8 +680,7 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, int mult = 3; unsigned ret; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + if (!ext4_has_feature_sparse_super(sb)) { ret = *min; *min += 1; return ret; @@ -1040,7 +1039,7 @@ exit_free: * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again. */ -static void update_backups(struct super_block *sb, int blk_off, char *data, +static void update_backups(struct super_block *sb, sector_t blk_off, char *data, int size, int meta_bg) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1065,7 +1064,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, group = ext4_list_backups(sb, &three, &five, &seven); last = sbi->s_groups_count; } else { - group = ext4_meta_bg_first_group(sb, group) + 1; + group = ext4_get_group_number(sb, blk_off) + 1; last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); } @@ -1158,7 +1157,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, int i, gdb_off, gdb_num, err = 0; int meta_bg; - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; @@ -1381,9 +1380,7 @@ static void ext4_update_super(struct super_block *sb, ext4_debug("free blocks count %llu", percpu_counter_read(&sbi->s_freeclusters_counter)); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_FLEX_BG) && - sbi->s_log_groups_per_flex) { + if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, group_data[0].group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), @@ -1476,8 +1473,7 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); - int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_META_BG); + int meta_bg = ext4_has_feature_meta_bg(sb); sector_t old_gdb = 0; update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, @@ -1585,8 +1581,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); - if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + if (gdb_off == 0 && !ext4_has_feature_sparse_super(sb)) { ext4_warning(sb, "Can't resize non-sparse filesystem further"); return -EPERM; } @@ -1604,9 +1599,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } if (reserved_gdb || gdb_off == 0) { - if (!EXT4_HAS_COMPAT_FEATURE(sb, - EXT4_FEATURE_COMPAT_RESIZE_INODE) - || !le16_to_cpu(es->s_reserved_gdt_blocks)) { + if (ext4_has_feature_resize_inode(sb) || + !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, "No reserved GDT blocks, can't resize"); return -EPERM; @@ -1825,8 +1819,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (err) goto errout; - EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + ext4_clear_feature_resize_inode(sb); + ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); @@ -1918,9 +1912,9 @@ retry: n_desc_blocks = num_desc_blocks(sb, n_group + 1); o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (ext4_has_feature_resize_inode(sb)) { if (meta_bg) { ext4_error(sb, "resize_inode and meta_bg enabled " "simultaneously"); diff --git a/kernel/fs/ext4/super.c b/kernel/fs/ext4/super.c index ff89971e3..c9ab67da6 100644 --- a/kernel/fs/ext4/super.c +++ b/kernel/fs/ext4/super.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> +#include <linux/backing-dev.h> #include <linux/parser.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> @@ -33,7 +34,6 @@ #include <linux/namei.h> #include <linux/quotaops.h> #include <linux/seq_file.h> -#include <linux/proc_fs.h> #include <linux/ctype.h> #include <linux/log2.h> #include <linux/crc16.h> @@ -53,12 +53,10 @@ #define CREATE_TRACE_POINTS #include <trace/events/ext4.h> -static struct proc_dir_entry *ext4_proc_root; -static struct kset *ext4_kset; static struct ext4_lazy_init *ext4_li_info; static struct mutex ext4_li_mtx; -static struct ext4_features *ext4_feat; static int ext4_mballoc_ready; +static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -81,9 +79,8 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); -static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); -#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", @@ -99,7 +96,6 @@ MODULE_ALIAS("ext2"); #endif -#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", @@ -110,15 +106,11 @@ static struct file_system_type ext3_fs_type = { MODULE_ALIAS_FS("ext3"); MODULE_ALIAS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) -#else -#define IS_EXT3_SB(sb) (0) -#endif static int ext4_verify_csum_type(struct super_block *sb, struct ext4_super_block *es) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_feature_metadata_csum(sb)) return 1; return es->s_checksum_type == EXT4_CRC32C_CHKSUM; @@ -396,9 +388,13 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= MS_RDONLY; } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); + } } #define ext4_error_ratelimit(sb) \ @@ -467,7 +463,7 @@ void __ext4_error_file(struct file *file, const char *function, es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); if (ext4_error_ratelimit(inode->i_sb)) { - path = d_path(&(file->f_path), pathname, sizeof(pathname)); + path = file_path(file, pathname, sizeof(pathname)); if (IS_ERR(path)) path = "(unknown)"; va_start(args, fmt); @@ -497,6 +493,12 @@ const char *ext4_decode_error(struct super_block *sb, int errno, char *errstr = NULL; switch (errno) { + case -EFSCORRUPTED: + errstr = "Corrupt filesystem"; + break; + case -EFSBADCRC: + errstr = "Filesystem failed CRC"; + break; case -EIO: errstr = "IO failure"; break; @@ -587,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs panic from previous error\n"); + } } void __ext4_msg(struct super_block *sb, @@ -607,14 +613,17 @@ void __ext4_msg(struct super_block *sb, va_end(args); } +#define ext4_warning_ratelimit(sb) \ + ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \ + "EXT4-fs warning") + void __ext4_warning(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) { struct va_format vaf; va_list args; - if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), - "EXT4-fs warning")) + if (!ext4_warning_ratelimit(sb)) return; va_start(args, fmt); @@ -625,6 +634,24 @@ void __ext4_warning(struct super_block *sb, const char *function, va_end(args); } +void __ext4_warning_inode(const struct inode *inode, const char *function, + unsigned int line, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (!ext4_warning_ratelimit(inode->i_sb)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: " + "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id, + function, line, inode->i_ino, current->comm, &vaf); + va_end(args); +} + void __ext4_grp_locked_error(const char *function, unsigned int line, struct super_block *sb, ext4_group_t grp, unsigned long ino, ext4_fsblk_t block, @@ -781,6 +808,7 @@ static void ext4_put_super(struct super_block *sb) ext4_abort(sb, "Couldn't clean up the journal"); } + ext4_unregister_sysfs(sb); ext4_es_unregister_shrinker(sbi); del_timer_sync(&sbi->s_err_report); ext4_release_system_zone(sb); @@ -789,18 +817,12 @@ static void ext4_put_super(struct super_block *sb) ext4_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY)) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } if (!(sb->s_flags & MS_RDONLY)) ext4_commit_super(sb, 1); - if (sbi->s_proc) { - remove_proc_entry("options", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } - kobject_del(&sbi->s_kobj); - for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kvfree(sbi->s_group_desc); @@ -896,9 +918,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) atomic_set(&ei->i_unwritten, 0); INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); #ifdef CONFIG_EXT4_FS_ENCRYPTION - ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID; + ei->i_crypt_info = NULL; #endif - return &ei->vfs_inode; } @@ -975,6 +996,10 @@ void ext4_clear_inode(struct inode *inode) jbd2_free_inode(EXT4_I(inode)->jinode); EXT4_I(inode)->jinode = NULL; } +#ifdef CONFIG_EXT4_FS_ENCRYPTION + if (EXT4_I(inode)->i_crypt_info) + ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info); +#endif } static struct inode *ext4_nfs_get_inode(struct super_block *sb, @@ -1036,7 +1061,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, return 0; if (journal) return jbd2_journal_try_to_free_buffers(journal, page, - wait & ~__GFP_WAIT); + wait & ~__GFP_DIRECT_RECLAIM); return try_to_free_buffers(page); } @@ -1266,7 +1291,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "quota options when quota turned on"); return -1; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { + if (ext4_has_feature_quota(sb)) { ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " "when QUOTA feature is enabled"); return -1; @@ -1359,10 +1384,10 @@ static const struct mount_opts { {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, - MOPT_EXT4_ONLY | MOPT_SET}, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | EXT4_MOUNT_JOURNAL_CHECKSUM), - MOPT_EXT4_ONLY | MOPT_SET}, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, @@ -1385,9 +1410,9 @@ static const struct mount_opts { {Opt_stripe, 0, MOPT_GTE0}, {Opt_resuid, 0, MOPT_GTE0}, {Opt_resgid, 0, MOPT_GTE0}, - {Opt_journal_dev, 0, MOPT_GTE0}, - {Opt_journal_path, 0, MOPT_STRING}, - {Opt_journal_ioprio, 0, MOPT_GTE0}, + {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0}, + {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING}, + {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0}, {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, @@ -1491,8 +1516,14 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, return -1; if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) return -1; - if (m->flags & MOPT_EXPLICIT) - set_opt2(sb, EXPLICIT_DELALLOC); + if (m->flags & MOPT_EXPLICIT) { + if (m->mount_opt & EXT4_MOUNT_DELALLOC) { + set_opt2(sb, EXPLICIT_DELALLOC); + } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { + set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM); + } else + return -1; + } if (m->flags & MOPT_CLEAR_ERR) clear_opt(sb, ERRORS_MASK); if (token == Opt_noquota && sb_any_quota_loaded(sb)) { @@ -1625,8 +1656,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, "quota options when quota turned on"); return -1; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_QUOTA)) { + if (ext4_has_feature_quota(sb)) { ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " "when QUOTA feature is enabled"); @@ -1634,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } sbi->s_jquota_fmt = m->mount_opt; #endif -#ifndef CONFIG_FS_DAX } else if (token == Opt_dax) { +#ifdef CONFIG_FS_DAX + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= m->mount_opt; +#else ext4_msg(sb, KERN_INFO, "dax option not supported"); return -1; #endif @@ -1685,7 +1719,7 @@ static int parse_options(char *options, struct super_block *sb, return 0; } #ifdef CONFIG_QUOTA - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + if (ext4_has_feature_quota(sb) && (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " "feature is enabled"); @@ -1858,7 +1892,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root) return _ext4_show_options(seq, root->d_sb, 0); } -static int options_seq_show(struct seq_file *seq, void *offset) +int ext4_seq_options_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; int rc; @@ -1869,19 +1903,6 @@ static int options_seq_show(struct seq_file *seq, void *offset) return rc; } -static int options_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, options_seq_show, PDE_DATA(inode)); -} - -static const struct file_operations ext4_seq_options_fops = { - .owner = THIS_MODULE, - .open = options_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int read_only) { @@ -1922,7 +1943,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, es->s_mtime = cpu_to_le32(get_seconds()); ext4_update_dynamic_rev(sb); if (sbi->s_journal) - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_set_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); done: @@ -2005,12 +2026,13 @@ failed: return 0; } -static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, +static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group, struct ext4_group_desc *gdp) { int offset; __u16 crc = 0; __le32 le_group = cpu_to_le32(block_group); + struct ext4_sb_info *sbi = EXT4_SB(sb); if (ext4_has_metadata_csum(sbi->s_sb)) { /* Use new metadata_csum algorithm */ @@ -2030,8 +2052,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, } /* old crc16 code */ - if (!(sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) + if (!ext4_has_feature_gdt_csum(sb)) return 0; offset = offsetof(struct ext4_group_desc, bg_checksum); @@ -2041,8 +2062,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, crc = crc16(crc, (__u8 *)gdp, offset); offset += sizeof(gdp->bg_checksum); /* skip checksum */ /* for checksum of struct ext4_group_desc do the rest...*/ - if ((sbi->s_es->s_feature_incompat & - cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + if (ext4_has_feature_64bit(sb) && offset < le16_to_cpu(sbi->s_es->s_desc_size)) crc = crc16(crc, (__u8 *)gdp + offset, le16_to_cpu(sbi->s_es->s_desc_size) - @@ -2056,8 +2076,7 @@ int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, struct ext4_group_desc *gdp) { if (ext4_has_group_desc_csum(sb) && - (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), - block_group, gdp))) + (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp))) return 0; return 1; @@ -2068,7 +2087,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, { if (!ext4_has_group_desc_csum(sb)) return; - gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp); } /* Called at mount-time, super-block is locked */ @@ -2084,7 +2103,7 @@ static int ext4_check_descriptors(struct super_block *sb, int flexbg_flag = 0; ext4_group_t i, grp = sbi->s_groups_count; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + if (ext4_has_feature_flex_bg(sb)) flexbg_flag = 1; ext4_debug("Checking group descriptors"); @@ -2128,7 +2147,7 @@ static int ext4_check_descriptors(struct super_block *sb, if (!ext4_group_desc_csum_verify(sb, i, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Checksum for group %u failed (%u!=%u)", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + i, le16_to_cpu(ext4_group_desc_csum(sb, i, gdp)), le16_to_cpu(gdp->bg_checksum)); if (!(sb->s_flags & MS_RDONLY)) { ext4_unlock_group(sb, i); @@ -2391,8 +2410,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || - nr < first_meta_bg) + if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg) return logical_sb_block + nr + 1; bg = sbi->s_desc_per_block * nr; if (ext4_bg_has_super(sb, bg)) @@ -2448,335 +2466,6 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) return ret; } -/* sysfs supprt */ - -struct ext4_attr { - struct attribute attr; - ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); - ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, - const char *, size_t); - union { - int offset; - int deprecated_val; - } u; -}; - -static int parse_strtoull(const char *buf, - unsigned long long max, unsigned long long *value) -{ - int ret; - - ret = kstrtoull(skip_spaces(buf), 0, value); - if (!ret && *value > max) - ret = -EINVAL; - return ret; -} - -static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (s64) EXT4_C2B(sbi, - percpu_counter_sum(&sbi->s_dirtyclusters_counter))); -} - -static ssize_t session_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->s_buddy_cache->i_sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - return snprintf(buf, PAGE_SIZE, "%lu\n", - (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - sbi->s_sectors_written_start) >> 1); -} - -static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->s_buddy_cache->i_sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1))); -} - -static ssize_t inode_readahead_blks_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned long t; - int ret; - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret) - return ret; - - if (t && (!is_power_of_2(t) || t > 0x40000000)) - return -EINVAL; - - sbi->s_inode_readahead_blks = t; - return count; -} - -static ssize_t sbi_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t sbi_ui_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); - unsigned long t; - int ret; - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret) - return ret; - *ui = t; - return count; -} - -static ssize_t es_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - - unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + - a->u.offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t reserved_clusters_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); -} - -static ssize_t reserved_clusters_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned long long val; - int ret; - - if (parse_strtoull(buf, -1ULL, &val)) - return -EINVAL; - ret = ext4_reserve_clusters(sbi, val); - - return ret ? ret : count; -} - -static ssize_t trigger_test_error(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - int len = count; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (len && buf[len-1] == '\n') - len--; - - if (len) - ext4_error(sbi->s_sb, "%.*s", len, buf); - return count; -} - -static ssize_t sbi_deprecated_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); -} - -#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .u = { \ - .offset = offsetof(struct ext4_sb_info, _elname),\ - }, \ -} - -#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .u = { \ - .offset = offsetof(struct ext4_super_block, _elname), \ - }, \ -} - -#define EXT4_ATTR(name, mode, show, store) \ -static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) - -#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) -#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) -#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) - -#define EXT4_RO_ATTR_ES_UI(name, elname) \ - EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) -#define EXT4_RW_ATTR_SBI_UI(name, elname) \ - EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) - -#define ATTR_LIST(name) &ext4_attr_##name.attr -#define EXT4_DEPRECATED_ATTR(_name, _val) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = 0444 }, \ - .show = sbi_deprecated_show, \ - .u = { \ - .deprecated_val = _val, \ - }, \ -} - -EXT4_RO_ATTR(delayed_allocation_blocks); -EXT4_RO_ATTR(session_write_kbytes); -EXT4_RO_ATTR(lifetime_write_kbytes); -EXT4_RW_ATTR(reserved_clusters); -EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, - inode_readahead_blks_store, s_inode_readahead_blks); -EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); -EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); -EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); -EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); -EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); -EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); -EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); -EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); -EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); -EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); -EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); -EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); -EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); -EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); -EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); - -static struct attribute *ext4_attrs[] = { - ATTR_LIST(delayed_allocation_blocks), - ATTR_LIST(session_write_kbytes), - ATTR_LIST(lifetime_write_kbytes), - ATTR_LIST(reserved_clusters), - ATTR_LIST(inode_readahead_blks), - ATTR_LIST(inode_goal), - ATTR_LIST(mb_stats), - ATTR_LIST(mb_max_to_scan), - ATTR_LIST(mb_min_to_scan), - ATTR_LIST(mb_order2_req), - ATTR_LIST(mb_stream_req), - ATTR_LIST(mb_group_prealloc), - ATTR_LIST(max_writeback_mb_bump), - ATTR_LIST(extent_max_zeroout_kb), - ATTR_LIST(trigger_fs_error), - ATTR_LIST(err_ratelimit_interval_ms), - ATTR_LIST(err_ratelimit_burst), - ATTR_LIST(warning_ratelimit_interval_ms), - ATTR_LIST(warning_ratelimit_burst), - ATTR_LIST(msg_ratelimit_interval_ms), - ATTR_LIST(msg_ratelimit_burst), - ATTR_LIST(errors_count), - ATTR_LIST(first_error_time), - ATTR_LIST(last_error_time), - NULL, -}; - -/* Features this copy of ext4 supports */ -EXT4_INFO_ATTR(lazy_itable_init); -EXT4_INFO_ATTR(batched_discard); -EXT4_INFO_ATTR(meta_bg_resize); -EXT4_INFO_ATTR(encryption); - -static struct attribute *ext4_feat_attrs[] = { - ATTR_LIST(lazy_itable_init), - ATTR_LIST(batched_discard), - ATTR_LIST(meta_bg_resize), - ATTR_LIST(encryption), - NULL, -}; - -static ssize_t ext4_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); - - return a->show ? a->show(a, sbi, buf) : 0; -} - -static ssize_t ext4_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, size_t len) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); - - return a->store ? a->store(a, sbi, buf, len) : 0; -} - -static void ext4_sb_release(struct kobject *kobj) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - complete(&sbi->s_kobj_unregister); -} - -static const struct sysfs_ops ext4_attr_ops = { - .show = ext4_attr_show, - .store = ext4_attr_store, -}; - -static struct kobj_type ext4_ktype = { - .default_attrs = ext4_attrs, - .sysfs_ops = &ext4_attr_ops, - .release = ext4_sb_release, -}; - -static void ext4_feat_release(struct kobject *kobj) -{ - complete(&ext4_feat->f_kobj_unregister); -} - -static ssize_t ext4_feat_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "supported\n"); -} - -/* - * We can not use ext4_attr_show/store because it relies on the kobject - * being embedded in the ext4_sb_info structure which is definitely not - * true in this case. - */ -static const struct sysfs_ops ext4_feat_ops = { - .show = ext4_feat_show, - .store = NULL, -}; - -static struct kobj_type ext4_feat_ktype = { - .default_attrs = ext4_feat_attrs, - .sysfs_ops = &ext4_feat_ops, - .release = ext4_feat_release, -}; - /* * Check whether this filesystem can be mounted based on * the features present and the RDONLY/RDWR mount requested. @@ -2785,7 +2474,7 @@ static struct kobj_type ext4_feat_ktype = { */ static int ext4_feature_set_ok(struct super_block *sb, int readonly) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { + if (ext4_has_unknown_ext4_incompat_features(sb)) { ext4_msg(sb, KERN_ERR, "Couldn't mount because of " "unsupported optional features (%x)", @@ -2797,14 +2486,14 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) if (readonly) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) { + if (ext4_has_feature_readonly(sb)) { ext4_msg(sb, KERN_INFO, "filesystem is read-only"); sb->s_flags |= MS_RDONLY; return 1; } /* Check that feature set is OK for a read-write mount */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { + if (ext4_has_unknown_ext4_ro_compat_features(sb)) { ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " "unsupported optional features (%x)", (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & @@ -2815,7 +2504,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) * Large file size enabled file system can only be mounted * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (ext4_has_feature_huge_file(sb)) { if (sizeof(blkcnt_t) < sizeof(u64)) { ext4_msg(sb, KERN_ERR, "Filesystem with huge files " "cannot be mounted RDWR without " @@ -2823,8 +2512,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && - !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) { ext4_msg(sb, KERN_ERR, "Can't support bigalloc feature without " "extents feature\n"); @@ -2832,8 +2520,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) } #ifndef CONFIG_QUOTA - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && - !readonly) { + if (ext4_has_feature_quota(sb) && !readonly) { ext4_msg(sb, KERN_ERR, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); @@ -3290,7 +2977,7 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + if (!ext4_has_feature_bigalloc(sb)) return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + sbi->s_itb_per_group + 2); @@ -3381,10 +3068,10 @@ int ext4_calculate_overhead(struct super_block *sb) return 0; } - -static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) +static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; + struct ext4_sb_info *sbi = EXT4_SB(sb); /* * There's no need to reserve anything when we aren't using extents. @@ -3392,8 +3079,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) * hole punching doesn't need new metadata... This is needed especially * to keep ext2/3 backward compatibility. */ - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) - return 0; + if (!ext4_has_feature_extents(sb)) + return; /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run @@ -3402,26 +3089,13 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ - resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> - EXT4_SB(sb)->s_cluster_bits; + resv_clusters = (ext4_blocks_count(sbi->s_es) >> + sbi->s_cluster_bits); do_div(resv_clusters, 50); resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); - return resv_clusters; -} - - -static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) -{ - ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> - sbi->s_cluster_bits; - - if (count >= clusters) - return -EINVAL; - - atomic64_set(&sbi->s_resv_clusters, count); - return 0; + atomic64_set(&sbi->s_resv_clusters, resv_clusters); } static int ext4_fill_super(struct super_block *sb, void *data, int silent) @@ -3437,7 +3111,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; - char *cp; const char *descr; int ret = -ENOMEM; int blocksize, clustersize; @@ -3466,15 +3139,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_bdev->bd_part) sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, sectors[1]); -#ifdef CONFIG_EXT4_FS_ENCRYPTION - /* Modes of operations for file and directory encryption. */ - sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; - sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID; -#endif /* Cleanup superblock name */ - for (cp = sb->s_id; (cp = strchr(cp, '/'));) - *cp = '!'; + strreplace(sb->s_id, '/', '!'); /* -EINVAL is default */ ret = -EINVAL; @@ -3511,9 +3178,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); /* Warn if metadata_csum and gdt_csum are both set. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + if (ext4_has_feature_metadata_csum(sb) && + ext4_has_feature_gdt_csum(sb)) ext4_warning(sb, "metadata_csum and uninit_bg are " "redundant flags; please run fsck."); @@ -3526,8 +3192,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Load the checksum driver */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_feature_metadata_csum(sb)) { sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); @@ -3542,11 +3207,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "invalid superblock checksum. Run e2fsck?"); silent = 1; + ret = -EFSBADCRC; goto cantfind_ext4; } /* Precompute checksum seed for all metadata */ - if (ext4_has_metadata_csum(sb)) + if (ext4_has_feature_csum_seed(sb)) + sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); + else if (ext4_has_metadata_csum(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); @@ -3641,23 +3309,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (test_opt(sb, DELALLOC)) clear_opt(sb, DELALLOC); + } else { + sb->s_iflags |= SB_I_CGROUPWB; } sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && - (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) + (ext4_has_compat_features(sb) || + ext4_has_ro_compat_features(sb) || + ext4_has_incompat_features(sb))) ext4_msg(sb, KERN_WARNING, "feature flags set on rev 0 fs, " "running e2fsck is recommended"); if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { set_opt2(sb, HURD_COMPAT); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_64BIT)) { + if (ext4_has_feature_64bit(sb)) { ext4_msg(sb, KERN_ERR, "The Hurd can't support 64-bit file systems"); goto failed_mount; @@ -3715,8 +3384,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) && - es->s_encryption_level) { + if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d", es->s_encryption_level); goto failed_mount; @@ -3748,8 +3416,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } - has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); + has_huge_files = ext4_has_feature_huge_file(sb); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); @@ -3773,7 +3440,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } sbi->s_desc_size = le16_to_cpu(es->s_desc_size); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { + if (ext4_has_feature_64bit(sb)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || sbi->s_desc_size > EXT4_MAX_DESC_SIZE || !is_power_of_2(sbi->s_desc_size)) { @@ -3804,7 +3471,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + if (ext4_has_feature_dir_index(sb)) { i = le32_to_cpu(es->s_flags); if (i & EXT2_FLAGS_UNSIGNED_HASH) sbi->s_hash_unsigned = 3; @@ -3824,8 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Handle clustersize */ clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); - has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC); + has_bigalloc = ext4_has_feature_bigalloc(sb); if (has_bigalloc) { if (clustersize < blocksize) { ext4_msg(sb, KERN_ERR, @@ -3944,13 +3610,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (ext4_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - - if (sbi->s_proc) - proc_create_data("options", S_IRUGO, sbi->s_proc, - &ext4_seq_options_fops, sb); - bgl_lock_init(sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -3965,6 +3624,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (!ext4_check_descriptors(sb, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); + ret = -EFSCORRUPTED; goto failed_mount2; } @@ -3990,7 +3650,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) + if (ext4_has_feature_quota(sb)) sb->s_qcop = &dquot_quotactl_sysfile_ops; else sb->s_qcop = &ext4_qctl_operations; @@ -4004,11 +3664,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = NULL; needs_recovery = (es->s_last_orphan != 0 || - EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_RECOVER)); + ext4_has_feature_journal_needs_recovery(sb)); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && - !(sb->s_flags & MS_RDONLY)) + if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) goto failed_mount3a; @@ -4016,23 +3674,47 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * The first inode we look at is the journal inode. Don't try * root first: it may be modified in the journal! */ - if (!test_opt(sb, NOLOAD) && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && - EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); goto failed_mount_wq; } else { + /* Nojournal mode, all journal mount options are illegal */ + if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_checksum, fs mounted w/o journal"); + goto failed_mount_wq; + } + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_async_commit, fs mounted w/o journal"); + goto failed_mount_wq; + } + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "commit=%lu, fs mounted w/o journal", + sbi->s_commit_interval / HZ); + goto failed_mount_wq; + } + if (EXT4_MOUNT_DATA_FLAGS & + (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "data=, fs mounted w/o journal"); + goto failed_mount_wq; + } + sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; + clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); sbi->s_journal = NULL; needs_recovery = 0; goto no_journal; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && + if (ext4_has_feature_64bit(sb) && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); @@ -4084,10 +3766,16 @@ no_journal: } } - if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) && - !(sb->s_flags & MS_RDONLY) && - !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) { - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT); + if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && + (blocksize != PAGE_CACHE_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Unsupported blocksize for fs encryption"); + goto failed_mount_wq; + } + + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) && + !ext4_has_feature_encrypt(sb)) { + ext4_set_feature_encrypt(sb); ext4_commit_super(sb, 1); } @@ -4146,8 +3834,7 @@ no_journal: if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { + if (ext4_has_feature_extra_isize(sb)) { if (sbi->s_want_extra_isize < le16_to_cpu(es->s_want_extra_isize)) sbi->s_want_extra_isize = @@ -4167,12 +3854,7 @@ no_journal: "available"); } - err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); - if (err) { - ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " - "reserved pool", ext4_calculate_resv_clusters(sb)); - goto failed_mount4a; - } + ext4_set_resv_clusters(sb); err = ext4_setup_system_zone(sb); if (err) { @@ -4211,7 +3893,7 @@ no_journal: goto failed_mount6; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + if (ext4_has_feature_flex_bg(sb)) if (!ext4_fill_flex_info(sb)) { ext4_msg(sb, KERN_ERR, "unable to initialize " @@ -4223,17 +3905,13 @@ no_journal: if (err) goto failed_mount6; - sbi->s_kobj.kset = ext4_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, - "%s", sb->s_id); + err = ext4_register_sysfs(sb); if (err) goto failed_mount7; #ifdef CONFIG_QUOTA /* Enable quota usage during mount. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && - !(sb->s_flags & MS_RDONLY)) { + if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) { err = ext4_enable_quotas(sb); if (err) goto failed_mount8; @@ -4265,9 +3943,10 @@ no_journal: "the device does not support discard"); } - ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data); + if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " + "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, + *sbi->s_es->s_mount_opts ? "; " : "", orig_data); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -4287,7 +3966,7 @@ cantfind_ext4: #ifdef CONFIG_QUOTA failed_mount8: - kobject_del(&sbi->s_kobj); + ext4_unregister_sysfs(sb); #endif failed_mount7: ext4_unregister_li_request(sb); @@ -4327,10 +4006,6 @@ failed_mount2: failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - if (sbi->s_proc) { - remove_proc_entry("options", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); @@ -4377,7 +4052,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, struct inode *journal_inode; journal_t *journal; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); /* First, test for the existence of a valid inode on disk. Bad * things happen if we iget() an unused inode, as the subsequent @@ -4427,7 +4102,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, struct ext4_super_block *es; struct block_device *bdev; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) @@ -4519,7 +4194,7 @@ static int ext4_load_journal(struct super_block *sb, int err = 0; int really_read_only; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { @@ -4536,7 +4211,7 @@ static int ext4_load_journal(struct super_block *sb, * crash? For recovery, we need to check in advance whether we * can get read-write access to the device. */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + if (ext4_has_feature_journal_needs_recovery(sb)) { if (sb->s_flags & MS_RDONLY) { ext4_msg(sb, KERN_INFO, "INFO: recovery " "required on readonly filesystem"); @@ -4567,7 +4242,7 @@ static int ext4_load_journal(struct super_block *sb, if (!(journal->j_flags & JBD2_BARRIER)) ext4_msg(sb, KERN_INFO, "barriers disabled"); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) + if (!ext4_has_feature_journal_needs_recovery(sb)) err = jbd2_journal_wipe(journal, !really_read_only); if (!err) { char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); @@ -4655,7 +4330,8 @@ static int ext4_commit_super(struct super_block *sb, int sync) ext4_superblock_csum_set(sb); mark_buffer_dirty(sbh); if (sync) { - error = sync_dirty_buffer(sbh); + error = __sync_dirty_buffer(sbh, + test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); if (error) return error; @@ -4680,7 +4356,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, { journal_t *journal = EXT4_SB(sb)->s_journal; - if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + if (!ext4_has_feature_journal(sb)) { BUG_ON(journal != NULL); return; } @@ -4688,9 +4364,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, if (jbd2_journal_flush(journal) < 0) goto out; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && + if (ext4_has_feature_journal_needs_recovery(sb) && sb->s_flags & MS_RDONLY) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); } @@ -4710,7 +4386,7 @@ static void ext4_clear_journal_err(struct super_block *sb, int j_errno; const char *errstr; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); journal = EXT4_SB(sb)->s_journal; @@ -4825,7 +4501,7 @@ static int ext4_freeze(struct super_block *sb) goto out; /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); } error = ext4_commit_super(sb, 1); @@ -4847,7 +4523,7 @@ static int ext4_unfreeze(struct super_block *sb) if (EXT4_SB(sb)->s_journal) { /* Reset the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_set_feature_journal_needs_recovery(sb); } ext4_commit_super(sb, 1); @@ -5000,8 +4676,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_mark_recovery_complete(sb, es); } else { /* Make sure we can mount this feature set readwrite */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_READONLY) || + if (ext4_has_feature_readonly(sb) || !ext4_feature_set_ok(sb, 0)) { err = -EROFS; goto restore_opts; @@ -5017,9 +4692,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!ext4_group_desc_csum_verify(sb, g, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_remount: Checksum for group %u failed (%u!=%u)", - g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), + g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)), le16_to_cpu(gdp->bg_checksum)); - err = -EINVAL; + err = -EFSBADCRC; goto restore_opts; } } @@ -5049,8 +4724,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_MMP)) + if (ext4_has_feature_mmp(sb)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) { err = -EROFS; @@ -5083,8 +4757,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (enable_quota) { if (sb_any_quota_suspended(sb)) dquot_resume(sb, -1); - else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_QUOTA)) { + else if (ext4_has_feature_quota(sb)) { err = ext4_enable_quotas(sb); if (err) goto restore_opts; @@ -5228,7 +4901,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot) struct ext4_sb_info *sbi = EXT4_SB(sb); /* Are we journaling quotas? */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || + if (ext4_has_feature_quota(sb) || sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return ext4_write_dquot(dquot); @@ -5316,7 +4989,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; - BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); + BUG_ON(!ext4_has_feature_quota(sb)); if (!qf_inums[type]) return -EPERM; @@ -5434,6 +5107,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int err, offset = off & (sb->s_blocksize - 1); + int retries = 0; struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -5454,7 +5128,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, return -EIO; } - bh = ext4_bread(handle, inode, blk, 1); + do { + bh = ext4_bread(handle, inode, blk, + EXT4_GET_BLOCKS_CREATE | + EXT4_GET_BLOCKS_METADATA_NOFAIL); + } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) && + ext4_should_retry_alloc(inode->i_sb, &retries)); if (IS_ERR(bh)) return PTR_ERR(bh); if (!bh) @@ -5488,7 +5167,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); } -#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static inline void register_as_ext2(void) { int err = register_filesystem(&ext2_fs_type); @@ -5504,11 +5183,11 @@ static inline void unregister_as_ext2(void) static inline int ext2_feature_set_ok(struct super_block *sb) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) + if (ext4_has_unknown_ext2_incompat_features(sb)) return 0; if (sb->s_flags & MS_RDONLY) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) + if (ext4_has_unknown_ext2_ro_compat_features(sb)) return 0; return 1; } @@ -5518,7 +5197,6 @@ static inline void unregister_as_ext2(void) { } static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } #endif -#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static inline void register_as_ext3(void) { int err = register_filesystem(&ext3_fs_type); @@ -5534,21 +5212,16 @@ static inline void unregister_as_ext3(void) static inline int ext3_feature_set_ok(struct super_block *sb) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) + if (ext4_has_unknown_ext3_incompat_features(sb)) return 0; - if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + if (!ext4_has_feature_journal(sb)) return 0; if (sb->s_flags & MS_RDONLY) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) + if (ext4_has_unknown_ext3_ro_compat_features(sb)) return 0; return 1; } -#else -static inline void register_as_ext3(void) { } -static inline void unregister_as_ext3(void) { } -static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } -#endif static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, @@ -5559,37 +5232,6 @@ static struct file_system_type ext4_fs_type = { }; MODULE_ALIAS_FS("ext4"); -static int __init ext4_init_feat_adverts(void) -{ - struct ext4_features *ef; - int ret = -ENOMEM; - - ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); - if (!ef) - goto out; - - ef->f_kobj.kset = ext4_kset; - init_completion(&ef->f_kobj_unregister); - ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, - "features"); - if (ret) { - kfree(ef); - goto out; - } - - ext4_feat = ef; - ret = 0; -out: - return ret; -} - -static void ext4_exit_feat_adverts(void) -{ - kobject_put(&ext4_feat->f_kobj); - wait_for_completion(&ext4_feat->f_kobj_unregister); - kfree(ext4_feat); -} - /* Shared across all ext4 file systems */ wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; @@ -5598,6 +5240,7 @@ static int __init ext4_init_fs(void) { int i, err; + ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64); ext4_li_info = NULL; mutex_init(&ext4_li_mtx); @@ -5615,21 +5258,15 @@ static int __init ext4_init_fs(void) err = ext4_init_pageio(); if (err) - goto out7; + goto out5; err = ext4_init_system_zone(); if (err) - goto out6; - ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); - if (!ext4_kset) { - err = -ENOMEM; - goto out5; - } - ext4_proc_root = proc_mkdir("fs/ext4", NULL); + goto out4; - err = ext4_init_feat_adverts(); + err = ext4_init_sysfs(); if (err) - goto out4; + goto out3; err = ext4_init_mballoc(); if (err) @@ -5654,16 +5291,12 @@ out1: ext4_mballoc_ready = 0; ext4_exit_mballoc(); out2: - ext4_exit_feat_adverts(); -out4: - if (ext4_proc_root) - remove_proc_entry("fs/ext4", NULL); - kset_unregister(ext4_kset); -out5: + ext4_exit_sysfs(); +out3: ext4_exit_system_zone(); -out6: +out4: ext4_exit_pageio(); -out7: +out5: ext4_exit_es(); return err; @@ -5671,15 +5304,14 @@ out7: static void __exit ext4_exit_fs(void) { + ext4_exit_crypto(); ext4_destroy_lazyinit_thread(); unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); destroy_inodecache(); ext4_exit_mballoc(); - ext4_exit_feat_adverts(); - remove_proc_entry("fs/ext4", NULL); - kset_unregister(ext4_kset); + ext4_exit_sysfs(); ext4_exit_system_zone(); ext4_exit_pageio(); ext4_exit_es(); diff --git a/kernel/fs/ext4/symlink.c b/kernel/fs/ext4/symlink.c index 187b78920..e8e7af62a 100644 --- a/kernel/fs/ext4/symlink.c +++ b/kernel/fs/ext4/symlink.c @@ -23,34 +23,28 @@ #include "xattr.h" #ifdef CONFIG_EXT4_FS_ENCRYPTION -static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie) { struct page *cpage = NULL; char *caddr, *paddr = NULL; struct ext4_str cstr, pstr; struct inode *inode = d_inode(dentry); - struct ext4_fname_crypto_ctx *ctx = NULL; struct ext4_encrypted_symlink_data *sd; loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); int res; u32 plen, max_size = inode->i_sb->s_blocksize; - if (!ext4_encrypted_inode(inode)) - return page_follow_link_light(dentry, nd); - - ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize); - if (IS_ERR(ctx)) - return ctx; + res = ext4_get_encryption_info(inode); + if (res) + return ERR_PTR(res); if (ext4_inode_is_fast_symlink(inode)) { caddr = (char *) EXT4_I(inode)->i_data; max_size = sizeof(EXT4_I(inode)->i_data); } else { cpage = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(cpage)) { - ext4_put_fname_crypto_ctx(&ctx); - return cpage; - } + if (IS_ERR(cpage)) + return ERR_CAST(cpage); caddr = kmap(cpage); caddr[size] = 0; } @@ -58,12 +52,12 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) /* Symlink is encrypted */ sd = (struct ext4_encrypted_symlink_data *)caddr; cstr.name = sd->encrypted_path; - cstr.len = le32_to_cpu(sd->len); + cstr.len = le16_to_cpu(sd->len); if ((cstr.len + sizeof(struct ext4_encrypted_symlink_data) - 1) > max_size) { /* Symlink data on the disk is corrupted */ - res = -EIO; + res = -EFSCORRUPTED; goto errout; } plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ? @@ -74,21 +68,19 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) goto errout; } pstr.name = paddr; - res = _ext4_fname_disk_to_usr(ctx, NULL, &cstr, &pstr); + pstr.len = plen; + res = _ext4_fname_disk_to_usr(inode, NULL, &cstr, &pstr); if (res < 0) goto errout; /* Null-terminate the name */ if (res <= plen) paddr[res] = '\0'; - nd_set_link(nd, paddr); - ext4_put_fname_crypto_ctx(&ctx); if (cpage) { kunmap(cpage); page_cache_release(cpage); } - return NULL; + return *cookie = paddr; errout: - ext4_put_fname_crypto_ctx(&ctx); if (cpage) { kunmap(cpage); page_cache_release(cpage); @@ -97,36 +89,22 @@ errout: return ERR_PTR(res); } -static void ext4_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - struct page *page = cookie; - - if (!page) { - kfree(nd_get_link(nd)); - } else { - kunmap(page); - page_cache_release(page); - } -} +const struct inode_operations ext4_encrypted_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = ext4_encrypted_follow_link, + .put_link = kfree_put_link, + .setattr = ext4_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext4_listxattr, + .removexattr = generic_removexattr, +}; #endif -static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ext4_inode_info *ei = EXT4_I(d_inode(dentry)); - nd_set_link(nd, (char *) ei->i_data); - return NULL; -} - const struct inode_operations ext4_symlink_inode_operations = { .readlink = generic_readlink, -#ifdef CONFIG_EXT4_FS_ENCRYPTION - .follow_link = ext4_follow_link, - .put_link = ext4_put_link, -#else .follow_link = page_follow_link_light, .put_link = page_put_link, -#endif .setattr = ext4_setattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -136,7 +114,7 @@ const struct inode_operations ext4_symlink_inode_operations = { const struct inode_operations ext4_fast_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = ext4_follow_fast_link, + .follow_link = simple_follow_link, .setattr = ext4_setattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/kernel/fs/ext4/sysfs.c b/kernel/fs/ext4/sysfs.c new file mode 100644 index 000000000..1420a3c61 --- /dev/null +++ b/kernel/fs/ext4/sysfs.c @@ -0,0 +1,448 @@ +/* + * linux/fs/ext4/sysfs.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Theodore Ts'o (tytso@mit.edu) + * + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> + +#include "ext4.h" +#include "ext4_jbd2.h" + +typedef enum { + attr_noop, + attr_delayed_allocation_blocks, + attr_session_write_kbytes, + attr_lifetime_write_kbytes, + attr_reserved_clusters, + attr_inode_readahead, + attr_trigger_test_error, + attr_feature, + attr_pointer_ui, + attr_pointer_atomic, +} attr_id_t; + +typedef enum { + ptr_explicit, + ptr_ext4_sb_info_offset, + ptr_ext4_super_block_offset, +} attr_ptr_t; + +static const char *proc_dirname = "fs/ext4"; +static struct proc_dir_entry *ext4_proc_root; + +struct ext4_attr { + struct attribute attr; + short attr_id; + short attr_ptr; + union { + int offset; + void *explicit_ptr; + } u; +}; + +static ssize_t session_write_kbytes_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->s_buddy_cache->i_sb; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + return snprintf(buf, PAGE_SIZE, "%lu\n", + (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + sbi->s_sectors_written_start) >> 1); +} + +static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + struct super_block *sb = sbi->s_buddy_cache->i_sb; + + if (!sb->s_bdev->bd_part) + return snprintf(buf, PAGE_SIZE, "0\n"); + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(sbi->s_kbytes_written + + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - + EXT4_SB(sb)->s_sectors_written_start) >> 1))); +} + +static ssize_t inode_readahead_blks_store(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned long t; + int ret; + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + + if (t && (!is_power_of_2(t) || t > 0x40000000)) + return -EINVAL; + + sbi->s_inode_readahead_blks = t; + return count; +} + +static ssize_t reserved_clusters_store(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned long long val; + ext4_fsblk_t clusters = (ext4_blocks_count(sbi->s_es) >> + sbi->s_cluster_bits); + int ret; + + ret = kstrtoull(skip_spaces(buf), 0, &val); + if (!ret || val >= clusters) + return -EINVAL; + + atomic64_set(&sbi->s_resv_clusters, val); + return count; +} + +static ssize_t trigger_test_error(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + int len = count; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (len && buf[len-1] == '\n') + len--; + + if (len) + ext4_error(sbi->s_sb, "%.*s", len, buf); + return count; +} + +#define EXT4_ATTR(_name,_mode,_id) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ +} + +#define EXT4_ATTR_FUNC(_name,_mode) EXT4_ATTR(_name,_mode,_name) + +#define EXT4_ATTR_FEATURE(_name) EXT4_ATTR(_name, 0444, feature) + +#define EXT4_ATTR_OFFSET(_name,_mode,_id,_struct,_elname) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ + .attr_ptr = ptr_##_struct##_offset, \ + .u = { \ + .offset = offsetof(struct _struct, _elname),\ + }, \ +} + +#define EXT4_RO_ATTR_ES_UI(_name,_elname) \ + EXT4_ATTR_OFFSET(_name, 0444, pointer_ui, ext4_super_block, _elname) + +#define EXT4_RW_ATTR_SBI_UI(_name,_elname) \ + EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname) + +#define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ + .attr_ptr = ptr_explicit, \ + .u = { \ + .explicit_ptr = _ptr, \ + }, \ +} + +#define ATTR_LIST(name) &ext4_attr_##name.attr + +EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444); +EXT4_ATTR_FUNC(session_write_kbytes, 0444); +EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444); +EXT4_ATTR_FUNC(reserved_clusters, 0644); + +EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, + ext4_sb_info, s_inode_readahead_blks); +EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); +EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); +EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); +EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); +EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); +EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); +EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); +EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); +EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); +EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); +EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); +EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); + +static unsigned int old_bump_val = 128; +EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); + +static struct attribute *ext4_attrs[] = { + ATTR_LIST(delayed_allocation_blocks), + ATTR_LIST(session_write_kbytes), + ATTR_LIST(lifetime_write_kbytes), + ATTR_LIST(reserved_clusters), + ATTR_LIST(inode_readahead_blks), + ATTR_LIST(inode_goal), + ATTR_LIST(mb_stats), + ATTR_LIST(mb_max_to_scan), + ATTR_LIST(mb_min_to_scan), + ATTR_LIST(mb_order2_req), + ATTR_LIST(mb_stream_req), + ATTR_LIST(mb_group_prealloc), + ATTR_LIST(max_writeback_mb_bump), + ATTR_LIST(extent_max_zeroout_kb), + ATTR_LIST(trigger_fs_error), + ATTR_LIST(err_ratelimit_interval_ms), + ATTR_LIST(err_ratelimit_burst), + ATTR_LIST(warning_ratelimit_interval_ms), + ATTR_LIST(warning_ratelimit_burst), + ATTR_LIST(msg_ratelimit_interval_ms), + ATTR_LIST(msg_ratelimit_burst), + ATTR_LIST(errors_count), + ATTR_LIST(first_error_time), + ATTR_LIST(last_error_time), + NULL, +}; + +/* Features this copy of ext4 supports */ +EXT4_ATTR_FEATURE(lazy_itable_init); +EXT4_ATTR_FEATURE(batched_discard); +EXT4_ATTR_FEATURE(meta_bg_resize); +EXT4_ATTR_FEATURE(encryption); +EXT4_ATTR_FEATURE(metadata_csum_seed); + +static struct attribute *ext4_feat_attrs[] = { + ATTR_LIST(lazy_itable_init), + ATTR_LIST(batched_discard), + ATTR_LIST(meta_bg_resize), + ATTR_LIST(encryption), + ATTR_LIST(metadata_csum_seed), + NULL, +}; + +static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) +{ + switch (a->attr_ptr) { + case ptr_explicit: + return a->u.explicit_ptr; + case ptr_ext4_sb_info_offset: + return (void *) (((char *) sbi) + a->u.offset); + case ptr_ext4_super_block_offset: + return (void *) (((char *) sbi->s_es) + a->u.offset); + } + return NULL; +} + +static ssize_t ext4_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + void *ptr = calc_ptr(a, sbi); + + switch (a->attr_id) { + case attr_delayed_allocation_blocks: + return snprintf(buf, PAGE_SIZE, "%llu\n", + (s64) EXT4_C2B(sbi, + percpu_counter_sum(&sbi->s_dirtyclusters_counter))); + case attr_session_write_kbytes: + return session_write_kbytes_show(a, sbi, buf); + case attr_lifetime_write_kbytes: + return lifetime_write_kbytes_show(a, sbi, buf); + case attr_reserved_clusters: + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long) + atomic64_read(&sbi->s_resv_clusters)); + case attr_inode_readahead: + case attr_pointer_ui: + if (!ptr) + return 0; + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) ptr)); + case attr_pointer_atomic: + if (!ptr) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", + atomic_read((atomic_t *) ptr)); + case attr_feature: + return snprintf(buf, PAGE_SIZE, "supported\n"); + } + + return 0; +} + +static ssize_t ext4_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); + void *ptr = calc_ptr(a, sbi); + unsigned long t; + int ret; + + switch (a->attr_id) { + case attr_reserved_clusters: + return reserved_clusters_store(a, sbi, buf, len); + case attr_pointer_ui: + if (!ptr) + return 0; + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + *((unsigned int *) ptr) = t; + return len; + case attr_inode_readahead: + return inode_readahead_blks_store(a, sbi, buf, len); + case attr_trigger_test_error: + return trigger_test_error(a, sbi, buf, len); + } + return 0; +} + +static void ext4_sb_release(struct kobject *kobj) +{ + struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +static const struct sysfs_ops ext4_attr_ops = { + .show = ext4_attr_show, + .store = ext4_attr_store, +}; + +static struct kobj_type ext4_sb_ktype = { + .default_attrs = ext4_attrs, + .sysfs_ops = &ext4_attr_ops, + .release = ext4_sb_release, +}; + +static struct kobj_type ext4_ktype = { + .sysfs_ops = &ext4_attr_ops, +}; + +static struct kset ext4_kset = { + .kobj = {.ktype = &ext4_ktype}, +}; + +static struct kobj_type ext4_feat_ktype = { + .default_attrs = ext4_feat_attrs, + .sysfs_ops = &ext4_attr_ops, +}; + +static struct kobject ext4_feat = { + .kset = &ext4_kset, +}; + +#define PROC_FILE_SHOW_DEFN(name) \ +static int name##_open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \ +} \ +\ +static const struct file_operations ext4_seq_##name##_fops = { \ + .owner = THIS_MODULE, \ + .open = name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + +#define PROC_FILE_LIST(name) \ + { __stringify(name), &ext4_seq_##name##_fops } + +PROC_FILE_SHOW_DEFN(es_shrinker_info); +PROC_FILE_SHOW_DEFN(options); + +static struct ext4_proc_files { + const char *name; + const struct file_operations *fops; +} proc_files[] = { + PROC_FILE_LIST(options), + PROC_FILE_LIST(es_shrinker_info), + PROC_FILE_LIST(mb_groups), + { NULL, NULL }, +}; + +int ext4_register_sysfs(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_proc_files *p; + int err; + + sbi->s_kobj.kset = &ext4_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, + "%s", sb->s_id); + if (err) + return err; + + if (ext4_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); + + if (sbi->s_proc) { + for (p = proc_files; p->name; p++) + proc_create_data(p->name, S_IRUGO, sbi->s_proc, + p->fops, sb); + } + return 0; +} + +void ext4_unregister_sysfs(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_proc_files *p; + + if (sbi->s_proc) { + for (p = proc_files; p->name; p++) + remove_proc_entry(p->name, sbi->s_proc); + remove_proc_entry(sb->s_id, ext4_proc_root); + } + kobject_del(&sbi->s_kobj); +} + +int __init ext4_init_sysfs(void) +{ + int ret; + + kobject_set_name(&ext4_kset.kobj, "ext4"); + ext4_kset.kobj.parent = fs_kobj; + ret = kset_register(&ext4_kset); + if (ret) + return ret; + + ret = kobject_init_and_add(&ext4_feat, &ext4_feat_ktype, + NULL, "features"); + if (ret) + kset_unregister(&ext4_kset); + else + ext4_proc_root = proc_mkdir(proc_dirname, NULL); + return ret; +} + +void ext4_exit_sysfs(void) +{ + kobject_put(&ext4_feat); + kset_unregister(&ext4_kset); + remove_proc_entry(proc_dirname, NULL); + ext4_proc_root = NULL; +} + diff --git a/kernel/fs/ext4/xattr.c b/kernel/fs/ext4/xattr.c index 16e28c08d..6b6b3e751 100644 --- a/kernel/fs/ext4/xattr.c +++ b/kernel/fs/ext4/xattr.c @@ -195,7 +195,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, while (!IS_LAST_ENTRY(e)) { struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) - return -EIO; + return -EFSCORRUPTED; e = next; } @@ -205,7 +205,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, (void *)e + sizeof(__u32) || value_start + le16_to_cpu(entry->e_value_offs) + le32_to_cpu(entry->e_value_size) > end)) - return -EIO; + return -EFSCORRUPTED; entry = EXT4_XATTR_NEXT(entry); } @@ -222,9 +222,9 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) - return -EIO; + return -EFSCORRUPTED; if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) - return -EIO; + return -EFSBADCRC; error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, bh->b_data); if (!error) @@ -239,7 +239,7 @@ ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) if (entry->e_value_block != 0 || value_size > size || le16_to_cpu(entry->e_value_offs) + value_size > size) - return -EIO; + return -EFSCORRUPTED; return 0; } @@ -266,7 +266,7 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, } *pentry = entry; if (!cmp && ext4_xattr_check_entry(entry, size)) - return -EIO; + return -EFSCORRUPTED; return cmp ? -ENODATA : 0; } @@ -297,13 +297,13 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, bad_block: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } ext4_xattr_cache_insert(ext4_mb_cache, bh); entry = BFIRST(bh); error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); - if (error == -EIO) + if (error == -EFSCORRUPTED) goto bad_block; if (error) goto cleanup; @@ -405,10 +405,9 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, ext4_xattr_handler(entry->e_name_index); if (handler) { - size_t size = handler->list(dentry, buffer, rest, - entry->e_name, - entry->e_name_len, - handler->flags); + size_t size = handler->list(handler, dentry, buffer, + rest, entry->e_name, + entry->e_name_len); if (buffer) { if (size > rest) return -ERANGE; @@ -445,7 +444,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ext4_xattr_check_block(inode, bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } ext4_xattr_cache_insert(ext4_mb_cache, bh); @@ -525,12 +524,12 @@ errout: static void ext4_xattr_update_super_block(handle_t *handle, struct super_block *sb) { - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) + if (ext4_has_feature_xattr(sb)) return; BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { - EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); + ext4_set_feature_xattr(sb); ext4_handle_dirty_super(handle, sb); } } @@ -751,7 +750,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, if (ext4_xattr_check_block(inode, bs->bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } /* Find the named attribute. */ @@ -811,7 +810,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, bs->bh); } unlock_buffer(bs->bh); - if (error == -EIO) + if (error == -EFSCORRUPTED) goto bad_block; if (!error) error = ext4_handle_dirty_xattr_block(handle, @@ -855,7 +854,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } error = ext4_xattr_set_entry(i, s); - if (error == -EIO) + if (error == -EFSCORRUPTED) goto bad_block; if (error) goto cleanup; @@ -1314,7 +1313,7 @@ retry: if (ext4_xattr_check_block(inode, bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); - error = -EIO; + error = -EFSCORRUPTED; goto cleanup; } base = BHDR(bh); @@ -1579,7 +1578,7 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1, memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) return 1; if (entry1->e_value_block != 0 || entry2->e_value_block != 0) - return -EIO; + return -EFSCORRUPTED; if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), (char *)header2 + le16_to_cpu(entry2->e_value_offs), le32_to_cpu(entry1->e_value_size))) diff --git a/kernel/fs/ext4/xattr_security.c b/kernel/fs/ext4/xattr_security.c index 95d90e056..36f4c1a84 100644 --- a/kernel/fs/ext4/xattr_security.c +++ b/kernel/fs/ext4/xattr_security.c @@ -12,8 +12,9 @@ #include "xattr.h" static size_t -ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; const size_t total_len = prefix_len + name_len + 1; @@ -28,8 +29,9 @@ ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext4_xattr_security_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -38,8 +40,9 @@ ext4_xattr_security_get(struct dentry *dentry, const char *name, } static int -ext4_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_security_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/kernel/fs/ext4/xattr_trusted.c b/kernel/fs/ext4/xattr_trusted.c index 891ee2ddf..488089053 100644 --- a/kernel/fs/ext4/xattr_trusted.c +++ b/kernel/fs/ext4/xattr_trusted.c @@ -13,8 +13,9 @@ #include "xattr.h" static size_t -ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -31,8 +32,9 @@ ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, - size_t size, int type) +ext4_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, void *buffer, + size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -41,8 +43,9 @@ ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, } static int -ext4_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_trusted_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/kernel/fs/ext4/xattr_user.c b/kernel/fs/ext4/xattr_user.c index 6ed932b3c..d2dec3364 100644 --- a/kernel/fs/ext4/xattr_user.c +++ b/kernel/fs/ext4/xattr_user.c @@ -12,8 +12,9 @@ #include "xattr.h" static size_t -ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -30,8 +31,9 @@ ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext4_xattr_user_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -42,8 +44,9 @@ ext4_xattr_user_get(struct dentry *dentry, const char *name, } static int -ext4_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_user_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; |