From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/fs/cachefiles/Kconfig | 39 ++ kernel/fs/cachefiles/Makefile | 18 + kernel/fs/cachefiles/bind.c | 277 +++++++++++ kernel/fs/cachefiles/daemon.c | 751 ++++++++++++++++++++++++++++++ kernel/fs/cachefiles/interface.c | 557 ++++++++++++++++++++++ kernel/fs/cachefiles/internal.h | 363 +++++++++++++++ kernel/fs/cachefiles/key.c | 159 +++++++ kernel/fs/cachefiles/main.c | 105 +++++ kernel/fs/cachefiles/namei.c | 978 +++++++++++++++++++++++++++++++++++++++ kernel/fs/cachefiles/proc.c | 134 ++++++ kernel/fs/cachefiles/rdwr.c | 967 ++++++++++++++++++++++++++++++++++++++ kernel/fs/cachefiles/security.c | 116 +++++ kernel/fs/cachefiles/xattr.c | 324 +++++++++++++ 13 files changed, 4788 insertions(+) create mode 100644 kernel/fs/cachefiles/Kconfig create mode 100644 kernel/fs/cachefiles/Makefile create mode 100644 kernel/fs/cachefiles/bind.c create mode 100644 kernel/fs/cachefiles/daemon.c create mode 100644 kernel/fs/cachefiles/interface.c create mode 100644 kernel/fs/cachefiles/internal.h create mode 100644 kernel/fs/cachefiles/key.c create mode 100644 kernel/fs/cachefiles/main.c create mode 100644 kernel/fs/cachefiles/namei.c create mode 100644 kernel/fs/cachefiles/proc.c create mode 100644 kernel/fs/cachefiles/rdwr.c create mode 100644 kernel/fs/cachefiles/security.c create mode 100644 kernel/fs/cachefiles/xattr.c (limited to 'kernel/fs/cachefiles') diff --git a/kernel/fs/cachefiles/Kconfig b/kernel/fs/cachefiles/Kconfig new file mode 100644 index 000000000..80e9c6167 --- /dev/null +++ b/kernel/fs/cachefiles/Kconfig @@ -0,0 +1,39 @@ + +config CACHEFILES + tristate "Filesystem caching on files" + depends on FSCACHE && BLOCK + help + This permits use of a mounted filesystem as a cache for other + filesystems - primarily networking filesystems - thus allowing fast + local disk to enhance the speed of slower devices. + + See Documentation/filesystems/caching/cachefiles.txt for more + information. + +config CACHEFILES_DEBUG + bool "Debug CacheFiles" + depends on CACHEFILES + help + This permits debugging to be dynamically enabled in the filesystem + caching on files module. If this is set, the debugging output may be + enabled by setting bits in /sys/modules/cachefiles/parameter/debug or + by including a debugging specifier in /etc/cachefilesd.conf. + +config CACHEFILES_HISTOGRAM + bool "Gather latency information on CacheFiles" + depends on CACHEFILES && PROC_FS + help + + This option causes latency information to be gathered on CacheFiles + operation and exported through file: + + /proc/fs/cachefiles/histogram + + The generation of this histogram adds a certain amount of overhead to + execution as there are a number of points at which data is gathered, + and on a multi-CPU system these may be on cachelines that keep + bouncing between CPUs. On the other hand, the histogram may be + useful for debugging purposes. Saying 'N' here is recommended. + + See Documentation/filesystems/caching/cachefiles.txt for more + information. diff --git a/kernel/fs/cachefiles/Makefile b/kernel/fs/cachefiles/Makefile new file mode 100644 index 000000000..32cbab0ff --- /dev/null +++ b/kernel/fs/cachefiles/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for caching in a mounted filesystem +# + +cachefiles-y := \ + bind.o \ + daemon.o \ + interface.o \ + key.o \ + main.o \ + namei.o \ + rdwr.o \ + security.o \ + xattr.o + +cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o + +obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/kernel/fs/cachefiles/bind.c b/kernel/fs/cachefiles/bind.c new file mode 100644 index 000000000..6af790fc3 --- /dev/null +++ b/kernel/fs/cachefiles/bind.c @@ -0,0 +1,277 @@ +/* Bind and unbind a cache from the filesystem backing it + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches); + +/* + * bind a directory as a cache + */ +int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) +{ + _enter("{%u,%u,%u,%u,%u,%u},%s", + cache->frun_percent, + cache->fcull_percent, + cache->fstop_percent, + cache->brun_percent, + cache->bcull_percent, + cache->bstop_percent, + args); + + /* start by checking things over */ + ASSERT(cache->fstop_percent >= 0 && + cache->fstop_percent < cache->fcull_percent && + cache->fcull_percent < cache->frun_percent && + cache->frun_percent < 100); + + ASSERT(cache->bstop_percent >= 0 && + cache->bstop_percent < cache->bcull_percent && + cache->bcull_percent < cache->brun_percent && + cache->brun_percent < 100); + + if (*args) { + pr_err("'bind' command doesn't take an argument\n"); + return -EINVAL; + } + + if (!cache->rootdirname) { + pr_err("No cache directory specified\n"); + return -EINVAL; + } + + /* don't permit already bound caches to be re-bound */ + if (test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("Cache already bound\n"); + return -EBUSY; + } + + /* make sure we have copies of the tag and dirname strings */ + if (!cache->tag) { + /* the tag string is released by the fops->release() + * function, so we don't release it on error here */ + cache->tag = kstrdup("CacheFiles", GFP_KERNEL); + if (!cache->tag) + return -ENOMEM; + } + + /* add the cache */ + return cachefiles_daemon_add_cache(cache); +} + +/* + * add a cache + */ +static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) +{ + struct cachefiles_object *fsdef; + struct path path; + struct kstatfs stats; + struct dentry *graveyard, *cachedir, *root; + const struct cred *saved_cred; + int ret; + + _enter(""); + + /* we want to work under the module's security ID */ + ret = cachefiles_get_security_ID(cache); + if (ret < 0) + return ret; + + cachefiles_begin_secure(cache, &saved_cred); + + /* allocate the root index object */ + ret = -ENOMEM; + + fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); + if (!fsdef) + goto error_root_object; + + ASSERTCMP(fsdef->backer, ==, NULL); + + atomic_set(&fsdef->usage, 1); + fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; + + _debug("- fsdef %p", fsdef); + + /* look up the directory at the root of the cache */ + ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path); + if (ret < 0) + goto error_open_root; + + cache->mnt = path.mnt; + root = path.dentry; + + /* check parameters */ + ret = -EOPNOTSUPP; + if (d_is_negative(root) || + !d_backing_inode(root)->i_op->lookup || + !d_backing_inode(root)->i_op->mkdir || + !d_backing_inode(root)->i_op->setxattr || + !d_backing_inode(root)->i_op->getxattr || + !root->d_sb->s_op->statfs || + !root->d_sb->s_op->sync_fs) + goto error_unsupported; + + ret = -EROFS; + if (root->d_sb->s_flags & MS_RDONLY) + goto error_unsupported; + + /* determine the security of the on-disk cache as this governs + * security ID of files we create */ + ret = cachefiles_determine_cache_security(cache, root, &saved_cred); + if (ret < 0) + goto error_unsupported; + + /* get the cache size and blocksize */ + ret = vfs_statfs(&path, &stats); + if (ret < 0) + goto error_unsupported; + + ret = -ERANGE; + if (stats.f_bsize <= 0) + goto error_unsupported; + + ret = -EOPNOTSUPP; + if (stats.f_bsize > PAGE_SIZE) + goto error_unsupported; + + cache->bsize = stats.f_bsize; + cache->bshift = 0; + if (stats.f_bsize < PAGE_SIZE) + cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); + + _debug("blksize %u (shift %u)", + cache->bsize, cache->bshift); + + _debug("size %llu, avail %llu", + (unsigned long long) stats.f_blocks, + (unsigned long long) stats.f_bavail); + + /* set up caching limits */ + do_div(stats.f_files, 100); + cache->fstop = stats.f_files * cache->fstop_percent; + cache->fcull = stats.f_files * cache->fcull_percent; + cache->frun = stats.f_files * cache->frun_percent; + + _debug("limits {%llu,%llu,%llu} files", + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop); + + stats.f_blocks >>= cache->bshift; + do_div(stats.f_blocks, 100); + cache->bstop = stats.f_blocks * cache->bstop_percent; + cache->bcull = stats.f_blocks * cache->bcull_percent; + cache->brun = stats.f_blocks * cache->brun_percent; + + _debug("limits {%llu,%llu,%llu} blocks", + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop); + + /* get the cache directory and check its type */ + cachedir = cachefiles_get_directory(cache, root, "cache"); + if (IS_ERR(cachedir)) { + ret = PTR_ERR(cachedir); + goto error_unsupported; + } + + fsdef->dentry = cachedir; + fsdef->fscache.cookie = NULL; + + ret = cachefiles_check_object_type(fsdef); + if (ret < 0) + goto error_unsupported; + + /* get the graveyard directory */ + graveyard = cachefiles_get_directory(cache, root, "graveyard"); + if (IS_ERR(graveyard)) { + ret = PTR_ERR(graveyard); + goto error_unsupported; + } + + cache->graveyard = graveyard; + + /* publish the cache */ + fscache_init_cache(&cache->cache, + &cachefiles_cache_ops, + "%s", + fsdef->dentry->d_sb->s_id); + + fscache_object_init(&fsdef->fscache, NULL, &cache->cache); + + ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); + if (ret < 0) + goto error_add_cache; + + /* done */ + set_bit(CACHEFILES_READY, &cache->flags); + dput(root); + + pr_info("File cache on %s registered\n", cache->cache.identifier); + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0); + cachefiles_end_secure(cache, saved_cred); + return 0; + +error_add_cache: + dput(cache->graveyard); + cache->graveyard = NULL; +error_unsupported: + mntput(cache->mnt); + cache->mnt = NULL; + dput(fsdef->dentry); + fsdef->dentry = NULL; + dput(root); +error_open_root: + kmem_cache_free(cachefiles_object_jar, fsdef); +error_root_object: + cachefiles_end_secure(cache, saved_cred); + pr_err("Failed to register: %d\n", ret); + return ret; +} + +/* + * unbind a cache on fd release + */ +void cachefiles_daemon_unbind(struct cachefiles_cache *cache) +{ + _enter(""); + + if (test_bit(CACHEFILES_READY, &cache->flags)) { + pr_info("File cache on %s unregistering\n", + cache->cache.identifier); + + fscache_withdraw_cache(&cache->cache); + } + + dput(cache->graveyard); + mntput(cache->mnt); + + kfree(cache->rootdirname); + kfree(cache->secctx); + kfree(cache->tag); + + _leave(""); +} diff --git a/kernel/fs/cachefiles/daemon.c b/kernel/fs/cachefiles/daemon.c new file mode 100644 index 000000000..f601def05 --- /dev/null +++ b/kernel/fs/cachefiles/daemon.c @@ -0,0 +1,751 @@ +/* Daemon interface + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static int cachefiles_daemon_open(struct inode *, struct file *); +static int cachefiles_daemon_release(struct inode *, struct file *); +static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t, + loff_t *); +static ssize_t cachefiles_daemon_write(struct file *, const char __user *, + size_t, loff_t *); +static unsigned int cachefiles_daemon_poll(struct file *, + struct poll_table_struct *); +static int cachefiles_daemon_frun(struct cachefiles_cache *, char *); +static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *); +static int cachefiles_daemon_brun(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *); +static int cachefiles_daemon_cull(struct cachefiles_cache *, char *); +static int cachefiles_daemon_debug(struct cachefiles_cache *, char *); +static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); +static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); +static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); +static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); + +static unsigned long cachefiles_open; + +const struct file_operations cachefiles_daemon_fops = { + .owner = THIS_MODULE, + .open = cachefiles_daemon_open, + .release = cachefiles_daemon_release, + .read = cachefiles_daemon_read, + .write = cachefiles_daemon_write, + .poll = cachefiles_daemon_poll, + .llseek = noop_llseek, +}; + +struct cachefiles_daemon_cmd { + char name[8]; + int (*handler)(struct cachefiles_cache *cache, char *args); +}; + +static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { + { "bind", cachefiles_daemon_bind }, + { "brun", cachefiles_daemon_brun }, + { "bcull", cachefiles_daemon_bcull }, + { "bstop", cachefiles_daemon_bstop }, + { "cull", cachefiles_daemon_cull }, + { "debug", cachefiles_daemon_debug }, + { "dir", cachefiles_daemon_dir }, + { "frun", cachefiles_daemon_frun }, + { "fcull", cachefiles_daemon_fcull }, + { "fstop", cachefiles_daemon_fstop }, + { "inuse", cachefiles_daemon_inuse }, + { "secctx", cachefiles_daemon_secctx }, + { "tag", cachefiles_daemon_tag }, + { "", NULL } +}; + + +/* + * do various checks + */ +static int cachefiles_daemon_open(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache; + + _enter(""); + + /* only the superuser may do this */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* the cachefiles device may only be open once at a time */ + if (xchg(&cachefiles_open, 1) == 1) + return -EBUSY; + + /* allocate a cache record */ + cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL); + if (!cache) { + cachefiles_open = 0; + return -ENOMEM; + } + + mutex_init(&cache->daemon_mutex); + cache->active_nodes = RB_ROOT; + rwlock_init(&cache->active_lock); + init_waitqueue_head(&cache->daemon_pollwq); + + /* set default caching limits + * - limit at 1% free space and/or free files + * - cull below 5% free space and/or free files + * - cease culling above 7% free space and/or free files + */ + cache->frun_percent = 7; + cache->fcull_percent = 5; + cache->fstop_percent = 1; + cache->brun_percent = 7; + cache->bcull_percent = 5; + cache->bstop_percent = 1; + + file->private_data = cache; + cache->cachefilesd = file; + return 0; +} + +/* + * release a cache + */ +static int cachefiles_daemon_release(struct inode *inode, struct file *file) +{ + struct cachefiles_cache *cache = file->private_data; + + _enter(""); + + ASSERT(cache); + + set_bit(CACHEFILES_DEAD, &cache->flags); + + cachefiles_daemon_unbind(cache); + + ASSERT(!cache->active_nodes.rb_node); + + /* clean up the control file interface */ + cache->cachefilesd = NULL; + file->private_data = NULL; + cachefiles_open = 0; + + kfree(cache); + + _leave(""); + return 0; +} + +/* + * read the cache state + */ +static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, + size_t buflen, loff_t *pos) +{ + struct cachefiles_cache *cache = file->private_data; + char buffer[256]; + int n; + + //_enter(",,%zu,", buflen); + + if (!test_bit(CACHEFILES_READY, &cache->flags)) + return 0; + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0); + + /* summarise */ + clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags); + + n = snprintf(buffer, sizeof(buffer), + "cull=%c" + " frun=%llx" + " fcull=%llx" + " fstop=%llx" + " brun=%llx" + " bcull=%llx" + " bstop=%llx", + test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop, + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop + ); + + if (n > buflen) + return -EMSGSIZE; + + if (copy_to_user(_buffer, buffer, n) != 0) + return -EFAULT; + + return n; +} + +/* + * command the cache + */ +static ssize_t cachefiles_daemon_write(struct file *file, + const char __user *_data, + size_t datalen, + loff_t *pos) +{ + const struct cachefiles_daemon_cmd *cmd; + struct cachefiles_cache *cache = file->private_data; + ssize_t ret; + char *data, *args, *cp; + + //_enter(",,%zu,", datalen); + + ASSERT(cache); + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) + return -EIO; + + if (datalen < 0 || datalen > PAGE_SIZE - 1) + return -EOPNOTSUPP; + + /* drag the command string into the kernel so we can parse it */ + data = kmalloc(datalen + 1, GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(data, _data, datalen) != 0) + goto error; + + data[datalen] = '\0'; + + ret = -EINVAL; + if (memchr(data, '\0', datalen)) + goto error; + + /* strip any newline */ + cp = memchr(data, '\n', datalen); + if (cp) { + if (cp == data) + goto error; + + *cp = '\0'; + } + + /* parse the command */ + ret = -EOPNOTSUPP; + + for (args = data; *args; args++) + if (isspace(*args)) + break; + if (*args) { + if (args == data) + goto error; + *args = '\0'; + args = skip_spaces(++args); + } + + /* run the appropriate command handler */ + for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++) + if (strcmp(cmd->name, data) == 0) + goto found_command; + +error: + kfree(data); + //_leave(" = %zd", ret); + return ret; + +found_command: + mutex_lock(&cache->daemon_mutex); + + ret = -EIO; + if (!test_bit(CACHEFILES_DEAD, &cache->flags)) + ret = cmd->handler(cache, args); + + mutex_unlock(&cache->daemon_mutex); + + if (ret == 0) + ret = datalen; + goto error; +} + +/* + * poll for culling state + * - use POLLOUT to indicate culling state + */ +static unsigned int cachefiles_daemon_poll(struct file *file, + struct poll_table_struct *poll) +{ + struct cachefiles_cache *cache = file->private_data; + unsigned int mask; + + poll_wait(file, &cache->daemon_pollwq, poll); + mask = 0; + + if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) + mask |= POLLIN; + + if (test_bit(CACHEFILES_CULLING, &cache->flags)) + mask |= POLLOUT; + + return mask; +} + +/* + * give a range error for cache space constraints + * - can be tail-called + */ +static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, + char *args) +{ + pr_err("Free space limits must be in range 0%%<=stop%" + */ +static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) +{ + unsigned long frun; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + frun = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (frun <= cache->fcull_percent || frun >= 100) + return cachefiles_daemon_range_error(cache, args); + + cache->frun_percent = frun; + return 0; +} + +/* + * set the percentage of files at which to start culling + * - command: "fcull %" + */ +static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) +{ + unsigned long fcull; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + fcull = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->fcull_percent = fcull; + return 0; +} + +/* + * set the percentage of files at which to stop allocating + * - command: "fstop %" + */ +static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) +{ + unsigned long fstop; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + fstop = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (fstop < 0 || fstop >= cache->fcull_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->fstop_percent = fstop; + return 0; +} + +/* + * set the percentage of blocks at which to stop culling + * - command: "brun %" + */ +static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) +{ + unsigned long brun; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + brun = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (brun <= cache->bcull_percent || brun >= 100) + return cachefiles_daemon_range_error(cache, args); + + cache->brun_percent = brun; + return 0; +} + +/* + * set the percentage of blocks at which to start culling + * - command: "bcull %" + */ +static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) +{ + unsigned long bcull; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bcull = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->bcull_percent = bcull; + return 0; +} + +/* + * set the percentage of blocks at which to stop allocating + * - command: "bstop %" + */ +static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) +{ + unsigned long bstop; + + _enter(",%s", args); + + if (!*args) + return -EINVAL; + + bstop = simple_strtoul(args, &args, 10); + if (args[0] != '%' || args[1] != '\0') + return -EINVAL; + + if (bstop < 0 || bstop >= cache->bcull_percent) + return cachefiles_daemon_range_error(cache, args); + + cache->bstop_percent = bstop; + return 0; +} + +/* + * set the cache directory + * - command: "dir " + */ +static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) +{ + char *dir; + + _enter(",%s", args); + + if (!*args) { + pr_err("Empty directory specified\n"); + return -EINVAL; + } + + if (cache->rootdirname) { + pr_err("Second cache directory specified\n"); + return -EEXIST; + } + + dir = kstrdup(args, GFP_KERNEL); + if (!dir) + return -ENOMEM; + + cache->rootdirname = dir; + return 0; +} + +/* + * set the cache security context + * - command: "secctx " + */ +static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) +{ + char *secctx; + + _enter(",%s", args); + + if (!*args) { + pr_err("Empty security context specified\n"); + return -EINVAL; + } + + if (cache->secctx) { + pr_err("Second security context specified\n"); + return -EINVAL; + } + + secctx = kstrdup(args, GFP_KERNEL); + if (!secctx) + return -ENOMEM; + + cache->secctx = secctx; + return 0; +} + +/* + * set the cache tag + * - command: "tag " + */ +static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) +{ + char *tag; + + _enter(",%s", args); + + if (!*args) { + pr_err("Empty tag specified\n"); + return -EINVAL; + } + + if (cache->tag) + return -EEXIST; + + tag = kstrdup(args, GFP_KERNEL); + if (!tag) + return -ENOMEM; + + cache->tag = tag; + return 0; +} + +/* + * request a node in the cache be culled from the current working directory + * - command: "cull " + */ +static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) +{ + struct path path; + const struct cred *saved_cred; + int ret; + + _enter(",%s", args); + + if (strchr(args, '/')) + goto inval; + + if (!test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("cull applied to unready cache\n"); + return -EIO; + } + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + pr_err("cull applied to dead cache\n"); + return -EIO; + } + + /* extract the directory dentry from the cwd */ + get_fs_pwd(current->fs, &path); + + if (!d_can_lookup(path.dentry)) + goto notdir; + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_cull(cache, path.dentry, args); + cachefiles_end_secure(cache, saved_cred); + + path_put(&path); + _leave(" = %d", ret); + return ret; + +notdir: + path_put(&path); + pr_err("cull command requires dirfd to be a directory\n"); + return -ENOTDIR; + +inval: + pr_err("cull command requires dirfd and filename\n"); + return -EINVAL; +} + +/* + * set debugging mode + * - command: "debug " + */ +static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) +{ + unsigned long mask; + + _enter(",%s", args); + + mask = simple_strtoul(args, &args, 0); + if (args[0] != '\0') + goto inval; + + cachefiles_debug = mask; + _leave(" = 0"); + return 0; + +inval: + pr_err("debug command requires mask\n"); + return -EINVAL; +} + +/* + * find out whether an object in the current working directory is in use or not + * - command: "inuse " + */ +static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) +{ + struct path path; + const struct cred *saved_cred; + int ret; + + //_enter(",%s", args); + + if (strchr(args, '/')) + goto inval; + + if (!test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("inuse applied to unready cache\n"); + return -EIO; + } + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + pr_err("inuse applied to dead cache\n"); + return -EIO; + } + + /* extract the directory dentry from the cwd */ + get_fs_pwd(current->fs, &path); + + if (!d_can_lookup(path.dentry)) + goto notdir; + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_check_in_use(cache, path.dentry, args); + cachefiles_end_secure(cache, saved_cred); + + path_put(&path); + //_leave(" = %d", ret); + return ret; + +notdir: + path_put(&path); + pr_err("inuse command requires dirfd to be a directory\n"); + return -ENOTDIR; + +inval: + pr_err("inuse command requires dirfd and filename\n"); + return -EINVAL; +} + +/* + * see if we have space for a number of pages and/or a number of files in the + * cache + */ +int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr) +{ + struct kstatfs stats; + struct path path = { + .mnt = cache->mnt, + .dentry = cache->mnt->mnt_root, + }; + int ret; + + //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", + // (unsigned long long) cache->frun, + // (unsigned long long) cache->fcull, + // (unsigned long long) cache->fstop, + // (unsigned long long) cache->brun, + // (unsigned long long) cache->bcull, + // (unsigned long long) cache->bstop, + // fnr, bnr); + + /* find out how many pages of blockdev are available */ + memset(&stats, 0, sizeof(stats)); + + ret = vfs_statfs(&path, &stats); + if (ret < 0) { + if (ret == -EIO) + cachefiles_io_error(cache, "statfs failed"); + _leave(" = %d", ret); + return ret; + } + + stats.f_bavail >>= cache->bshift; + + //_debug("avail %llu,%llu", + // (unsigned long long) stats.f_ffree, + // (unsigned long long) stats.f_bavail); + + /* see if there is sufficient space */ + if (stats.f_ffree > fnr) + stats.f_ffree -= fnr; + else + stats.f_ffree = 0; + + if (stats.f_bavail > bnr) + stats.f_bavail -= bnr; + else + stats.f_bavail = 0; + + ret = -ENOBUFS; + if (stats.f_ffree < cache->fstop || + stats.f_bavail < cache->bstop) + goto begin_cull; + + ret = 0; + if (stats.f_ffree < cache->fcull || + stats.f_bavail < cache->bcull) + goto begin_cull; + + if (test_bit(CACHEFILES_CULLING, &cache->flags) && + stats.f_ffree >= cache->frun && + stats.f_bavail >= cache->brun && + test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) + ) { + _debug("cease culling"); + cachefiles_state_changed(cache); + } + + //_leave(" = 0"); + return 0; + +begin_cull: + if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { + _debug("### CULL CACHE ###"); + cachefiles_state_changed(cache); + } + + _leave(" = %d", ret); + return ret; +} diff --git a/kernel/fs/cachefiles/interface.c b/kernel/fs/cachefiles/interface.c new file mode 100644 index 000000000..afa023dde --- /dev/null +++ b/kernel/fs/cachefiles/interface.c @@ -0,0 +1,557 @@ +/* FS-Cache interface to CacheFiles + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include "internal.h" + +struct cachefiles_lookup_data { + struct cachefiles_xattr *auxdata; /* auxiliary data */ + char *key; /* key path */ +}; + +static int cachefiles_attr_changed(struct fscache_object *_object); + +/* + * allocate an object record for a cookie lookup and prepare the lookup data + */ +static struct fscache_object *cachefiles_alloc_object( + struct fscache_cache *_cache, + struct fscache_cookie *cookie) +{ + struct cachefiles_lookup_data *lookup_data; + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct cachefiles_xattr *auxdata; + unsigned keylen, auxlen; + void *buffer; + char *key; + + cache = container_of(_cache, struct cachefiles_cache, cache); + + _enter("{%s},%p,", cache->cache.identifier, cookie); + + lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp); + if (!lookup_data) + goto nomem_lookup_data; + + /* create a new object record and a temporary leaf image */ + object = kmem_cache_alloc(cachefiles_object_jar, cachefiles_gfp); + if (!object) + goto nomem_object; + + ASSERTCMP(object->backer, ==, NULL); + + BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); + atomic_set(&object->usage, 1); + + fscache_object_init(&object->fscache, cookie, &cache->cache); + + object->type = cookie->def->type; + + /* get hold of the raw key + * - stick the length on the front and leave space on the back for the + * encoder + */ + buffer = kmalloc((2 + 512) + 3, cachefiles_gfp); + if (!buffer) + goto nomem_buffer; + + keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512); + ASSERTCMP(keylen, <, 512); + + *(uint16_t *)buffer = keylen; + ((char *)buffer)[keylen + 2] = 0; + ((char *)buffer)[keylen + 3] = 0; + ((char *)buffer)[keylen + 4] = 0; + + /* turn the raw key into something that can work with as a filename */ + key = cachefiles_cook_key(buffer, keylen + 2, object->type); + if (!key) + goto nomem_key; + + /* get hold of the auxiliary data and prepend the object type */ + auxdata = buffer; + auxlen = 0; + if (cookie->def->get_aux) { + auxlen = cookie->def->get_aux(cookie->netfs_data, + auxdata->data, 511); + ASSERTCMP(auxlen, <, 511); + } + + auxdata->len = auxlen + 1; + auxdata->type = cookie->def->type; + + lookup_data->auxdata = auxdata; + lookup_data->key = key; + object->lookup_data = lookup_data; + + _leave(" = %p [%p]", &object->fscache, lookup_data); + return &object->fscache; + +nomem_key: + kfree(buffer); +nomem_buffer: + BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); + kmem_cache_free(cachefiles_object_jar, object); + fscache_object_destroyed(&cache->cache); +nomem_object: + kfree(lookup_data); +nomem_lookup_data: + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); +} + +/* + * attempt to look up the nominated node in this cache + * - return -ETIMEDOUT to be scheduled again + */ +static int cachefiles_lookup_object(struct fscache_object *_object) +{ + struct cachefiles_lookup_data *lookup_data; + struct cachefiles_object *parent, *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + int ret; + + _enter("{OBJ%x}", _object->debug_id); + + cache = container_of(_object->cache, struct cachefiles_cache, cache); + parent = container_of(_object->parent, + struct cachefiles_object, fscache); + object = container_of(_object, struct cachefiles_object, fscache); + lookup_data = object->lookup_data; + + ASSERTCMP(lookup_data, !=, NULL); + + /* look up the key, creating any missing bits */ + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_walk_to_object(parent, object, + lookup_data->key, + lookup_data->auxdata); + cachefiles_end_secure(cache, saved_cred); + + /* polish off by setting the attributes of non-index files */ + if (ret == 0 && + object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) + cachefiles_attr_changed(&object->fscache); + + if (ret < 0 && ret != -ETIMEDOUT) { + if (ret != -ENOBUFS) + pr_warn("Lookup failed error %d\n", ret); + fscache_object_lookup_error(&object->fscache); + } + + _leave(" [%d]", ret); + return ret; +} + +/* + * indication of lookup completion + */ +static void cachefiles_lookup_complete(struct fscache_object *_object) +{ + struct cachefiles_object *object; + + object = container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data); + + if (object->lookup_data) { + kfree(object->lookup_data->key); + kfree(object->lookup_data->auxdata); + kfree(object->lookup_data); + object->lookup_data = NULL; + } +} + +/* + * increment the usage count on an inode object (may fail if unmounting) + */ +static +struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) +{ + struct cachefiles_object *object = + container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage)); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); +#endif + + atomic_inc(&object->usage); + return &object->fscache; +} + +/* + * update the auxiliary data for an object object on disk + */ +static void cachefiles_update_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_xattr *auxdata; + struct cachefiles_cache *cache; + struct fscache_cookie *cookie; + const struct cred *saved_cred; + unsigned auxlen; + + _enter("{OBJ%x}", _object->debug_id); + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, struct cachefiles_cache, + cache); + + if (!fscache_use_cookie(_object)) { + _leave(" [relinq]"); + return; + } + + cookie = object->fscache.cookie; + + if (!cookie->def->get_aux) { + fscache_unuse_cookie(_object); + _leave(" [no aux]"); + return; + } + + auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); + if (!auxdata) { + fscache_unuse_cookie(_object); + _leave(" [nomem]"); + return; + } + + auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); + fscache_unuse_cookie(_object); + ASSERTCMP(auxlen, <, 511); + + auxdata->len = auxlen + 1; + auxdata->type = cookie->def->type; + + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_update_object_xattr(object, auxdata); + cachefiles_end_secure(cache, saved_cred); + kfree(auxdata); + _leave(""); +} + +/* + * discard the resources pinned by an object and effect retirement if + * requested + */ +static void cachefiles_drop_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + + ASSERT(_object); + + object = container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%d}", + object->fscache.debug_id, atomic_read(&object->usage)); + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); +#endif + + /* We need to tidy the object up if we did in fact manage to open it. + * It's possible for us to get here before the object is fully + * initialised if the parent goes away or the object gets retired + * before we set it up. + */ + if (object->dentry) { + /* delete retired objects */ + if (test_bit(FSCACHE_OBJECT_RETIRED, &object->fscache.flags) && + _object != cache->cache.fsdef + ) { + _debug("- retire object OBJ%x", object->fscache.debug_id); + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_delete_object(cache, object); + cachefiles_end_secure(cache, saved_cred); + } + + /* close the filesystem stuff attached to the object */ + if (object->backer != object->dentry) + dput(object->backer); + object->backer = NULL; + } + + /* note that the object is now inactive */ + if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { + write_lock(&cache->active_lock); + if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE, + &object->flags)) + BUG(); + rb_erase(&object->active_node, &cache->active_nodes); + wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); + write_unlock(&cache->active_lock); + } + + dput(object->dentry); + object->dentry = NULL; + + _leave(""); +} + +/* + * dispose of a reference to an object + */ +static void cachefiles_put_object(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct fscache_cache *cache; + + ASSERT(_object); + + object = container_of(_object, struct cachefiles_object, fscache); + + _enter("{OBJ%x,%d}", + object->fscache.debug_id, atomic_read(&object->usage)); + +#ifdef CACHEFILES_DEBUG_SLAB + ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); +#endif + + ASSERTIFCMP(object->fscache.parent, + object->fscache.parent->n_children, >, 0); + + if (atomic_dec_and_test(&object->usage)) { + _debug("- kill object OBJ%x", object->fscache.debug_id); + + ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); + ASSERTCMP(object->fscache.parent, ==, NULL); + ASSERTCMP(object->backer, ==, NULL); + ASSERTCMP(object->dentry, ==, NULL); + ASSERTCMP(object->fscache.n_ops, ==, 0); + ASSERTCMP(object->fscache.n_children, ==, 0); + + if (object->lookup_data) { + kfree(object->lookup_data->key); + kfree(object->lookup_data->auxdata); + kfree(object->lookup_data); + object->lookup_data = NULL; + } + + cache = object->fscache.cache; + fscache_object_destroy(&object->fscache); + kmem_cache_free(cachefiles_object_jar, object); + fscache_object_destroyed(cache); + } + + _leave(""); +} + +/* + * sync a cache + */ +static void cachefiles_sync_cache(struct fscache_cache *_cache) +{ + struct cachefiles_cache *cache; + const struct cred *saved_cred; + int ret; + + _enter("%p", _cache); + + cache = container_of(_cache, struct cachefiles_cache, cache); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disc */ + cachefiles_begin_secure(cache, &saved_cred); + down_read(&cache->mnt->mnt_sb->s_umount); + ret = sync_filesystem(cache->mnt->mnt_sb); + up_read(&cache->mnt->mnt_sb->s_umount); + cachefiles_end_secure(cache, saved_cred); + + if (ret == -EIO) + cachefiles_io_error(cache, + "Attempt to sync backing fs superblock" + " returned error %d", + ret); +} + +/* + * check if the backing cache is updated to FS-Cache + * - called by FS-Cache when evaluates if need to invalidate the cache + */ +static bool cachefiles_check_consistency(struct fscache_operation *op) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + int ret; + + _enter("{OBJ%x}", op->object->debug_id); + + object = container_of(op->object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + cachefiles_begin_secure(cache, &saved_cred); + ret = cachefiles_check_auxdata(object); + cachefiles_end_secure(cache, saved_cred); + + _leave(" = %d", ret); + return ret; +} + +/* + * notification the attributes on an object have changed + * - called with reads/writes excluded by FS-Cache + */ +static int cachefiles_attr_changed(struct fscache_object *_object) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + struct iattr newattrs; + uint64_t ni_size; + loff_t oi_size; + int ret; + + _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size); + + _enter("{OBJ%x},[%llu]", + _object->debug_id, (unsigned long long) ni_size); + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + if (ni_size == object->i_size) + return 0; + + if (!object->backer) + return -ENOBUFS; + + ASSERT(d_is_reg(object->backer)); + + fscache_set_store_limit(&object->fscache, ni_size); + + oi_size = i_size_read(d_backing_inode(object->backer)); + if (oi_size == ni_size) + return 0; + + cachefiles_begin_secure(cache, &saved_cred); + mutex_lock(&d_inode(object->backer)->i_mutex); + + /* if there's an extension to a partial page at the end of the backing + * file, we need to discard the partial page so that we pick up new + * data after it */ + if (oi_size & ~PAGE_MASK && ni_size > oi_size) { + _debug("discard tail %llx", oi_size); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = oi_size & PAGE_MASK; + ret = notify_change(object->backer, &newattrs, NULL); + if (ret < 0) + goto truncate_failed; + } + + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = ni_size; + ret = notify_change(object->backer, &newattrs, NULL); + +truncate_failed: + mutex_unlock(&d_inode(object->backer)->i_mutex); + cachefiles_end_secure(cache, saved_cred); + + if (ret == -EIO) { + fscache_set_store_limit(&object->fscache, 0); + cachefiles_io_error_obj(object, "Size set failed"); + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Invalidate an object + */ +static void cachefiles_invalidate_object(struct fscache_operation *op) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + struct path path; + uint64_t ni_size; + int ret; + + object = container_of(op->object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + op->object->cookie->def->get_attr(op->object->cookie->netfs_data, + &ni_size); + + _enter("{OBJ%x},[%llu]", + op->object->debug_id, (unsigned long long)ni_size); + + if (object->backer) { + ASSERT(d_is_reg(object->backer)); + + fscache_set_store_limit(&object->fscache, ni_size); + + path.dentry = object->backer; + path.mnt = cache->mnt; + + cachefiles_begin_secure(cache, &saved_cred); + ret = vfs_truncate(&path, 0); + if (ret == 0) + ret = vfs_truncate(&path, ni_size); + cachefiles_end_secure(cache, saved_cred); + + if (ret != 0) { + fscache_set_store_limit(&object->fscache, 0); + if (ret == -EIO) + cachefiles_io_error_obj(object, + "Invalidate failed"); + } + } + + fscache_op_complete(op, true); + _leave(""); +} + +/* + * dissociate a cache from all the pages it was backing + */ +static void cachefiles_dissociate_pages(struct fscache_cache *cache) +{ + _enter(""); +} + +const struct fscache_cache_ops cachefiles_cache_ops = { + .name = "cachefiles", + .alloc_object = cachefiles_alloc_object, + .lookup_object = cachefiles_lookup_object, + .lookup_complete = cachefiles_lookup_complete, + .grab_object = cachefiles_grab_object, + .update_object = cachefiles_update_object, + .invalidate_object = cachefiles_invalidate_object, + .drop_object = cachefiles_drop_object, + .put_object = cachefiles_put_object, + .sync_cache = cachefiles_sync_cache, + .attr_changed = cachefiles_attr_changed, + .read_or_alloc_page = cachefiles_read_or_alloc_page, + .read_or_alloc_pages = cachefiles_read_or_alloc_pages, + .allocate_page = cachefiles_allocate_page, + .allocate_pages = cachefiles_allocate_pages, + .write_page = cachefiles_write_page, + .uncache_page = cachefiles_uncache_page, + .dissociate_pages = cachefiles_dissociate_pages, + .check_consistency = cachefiles_check_consistency, +}; diff --git a/kernel/fs/cachefiles/internal.h b/kernel/fs/cachefiles/internal.h new file mode 100644 index 000000000..8c52472d2 --- /dev/null +++ b/kernel/fs/cachefiles/internal.h @@ -0,0 +1,363 @@ +/* General netfs cache on cache files internal defs + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) "CacheFiles: " fmt + + +#include +#include +#include +#include +#include + +struct cachefiles_cache; +struct cachefiles_object; + +extern unsigned cachefiles_debug; +#define CACHEFILES_DEBUG_KENTER 1 +#define CACHEFILES_DEBUG_KLEAVE 2 +#define CACHEFILES_DEBUG_KDEBUG 4 + +#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) + +/* + * node records + */ +struct cachefiles_object { + struct fscache_object fscache; /* fscache handle */ + struct cachefiles_lookup_data *lookup_data; /* cached lookup data */ + struct dentry *dentry; /* the file/dir representing this object */ + struct dentry *backer; /* backing file */ + loff_t i_size; /* object size */ + unsigned long flags; +#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ +#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */ + atomic_t usage; /* object usage count */ + uint8_t type; /* object type */ + uint8_t new; /* T if object new */ + spinlock_t work_lock; + struct rb_node active_node; /* link in active tree (dentry is key) */ +}; + +extern struct kmem_cache *cachefiles_object_jar; + +/* + * Cache files cache definition + */ +struct cachefiles_cache { + struct fscache_cache cache; /* FS-Cache record */ + struct vfsmount *mnt; /* mountpoint holding the cache */ + struct dentry *graveyard; /* directory into which dead objects go */ + struct file *cachefilesd; /* manager daemon handle */ + const struct cred *cache_cred; /* security override for accessing cache */ + struct mutex daemon_mutex; /* command serialisation mutex */ + wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */ + struct rb_root active_nodes; /* active nodes (can't be culled) */ + rwlock_t active_lock; /* lock for active_nodes */ + atomic_t gravecounter; /* graveyard uniquifier */ + unsigned frun_percent; /* when to stop culling (% files) */ + unsigned fcull_percent; /* when to start culling (% files) */ + unsigned fstop_percent; /* when to stop allocating (% files) */ + unsigned brun_percent; /* when to stop culling (% blocks) */ + unsigned bcull_percent; /* when to start culling (% blocks) */ + unsigned bstop_percent; /* when to stop allocating (% blocks) */ + unsigned bsize; /* cache's block size */ + unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */ + uint64_t frun; /* when to stop culling */ + uint64_t fcull; /* when to start culling */ + uint64_t fstop; /* when to stop allocating */ + sector_t brun; /* when to stop culling */ + sector_t bcull; /* when to start culling */ + sector_t bstop; /* when to stop allocating */ + unsigned long flags; +#define CACHEFILES_READY 0 /* T if cache prepared */ +#define CACHEFILES_DEAD 1 /* T if cache dead */ +#define CACHEFILES_CULLING 2 /* T if cull engaged */ +#define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ + char *rootdirname; /* name of cache root directory */ + char *secctx; /* LSM security context */ + char *tag; /* cache binding tag */ +}; + +/* + * backing file read tracking + */ +struct cachefiles_one_read { + wait_queue_t monitor; /* link into monitored waitqueue */ + struct page *back_page; /* backing file page we're waiting for */ + struct page *netfs_page; /* netfs page we're going to fill */ + struct fscache_retrieval *op; /* retrieval op covering this */ + struct list_head op_link; /* link in op's todo list */ +}; + +/* + * backing file write tracking + */ +struct cachefiles_one_write { + struct page *netfs_page; /* netfs page to copy */ + struct cachefiles_object *object; + struct list_head obj_link; /* link in object's lists */ + fscache_rw_complete_t end_io_func; + void *context; +}; + +/* + * auxiliary data xattr buffer + */ +struct cachefiles_xattr { + uint16_t len; + uint8_t type; + uint8_t data[]; +}; + +/* + * note change of state for daemon + */ +static inline void cachefiles_state_changed(struct cachefiles_cache *cache) +{ + set_bit(CACHEFILES_STATE_CHANGED, &cache->flags); + wake_up_all(&cache->daemon_pollwq); +} + +/* + * bind.c + */ +extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); +extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); + +/* + * daemon.c + */ +extern const struct file_operations cachefiles_daemon_fops; + +extern int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr); + +/* + * interface.c + */ +extern const struct fscache_cache_ops cachefiles_cache_ops; + +/* + * key.c + */ +extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); + +/* + * namei.c + */ +extern int cachefiles_delete_object(struct cachefiles_cache *cache, + struct cachefiles_object *object); +extern int cachefiles_walk_to_object(struct cachefiles_object *parent, + struct cachefiles_object *object, + const char *key, + struct cachefiles_xattr *auxdata); +extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *name); + +extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename); + +extern int cachefiles_check_in_use(struct cachefiles_cache *cache, + struct dentry *dir, char *filename); + +/* + * proc.c + */ +#ifdef CONFIG_CACHEFILES_HISTOGRAM +extern atomic_t cachefiles_lookup_histogram[HZ]; +extern atomic_t cachefiles_mkdir_histogram[HZ]; +extern atomic_t cachefiles_create_histogram[HZ]; + +extern int __init cachefiles_proc_init(void); +extern void cachefiles_proc_cleanup(void); +static inline +void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) +{ + unsigned long jif = jiffies - start_jif; + if (jif >= HZ) + jif = HZ - 1; + atomic_inc(&histogram[jif]); +} + +#else +#define cachefiles_proc_init() (0) +#define cachefiles_proc_cleanup() do {} while (0) +#define cachefiles_hist(hist, start_jif) do {} while (0) +#endif + +/* + * rdwr.c + */ +extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, + struct page *, gfp_t); +extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, + struct list_head *, unsigned *, + gfp_t); +extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, + gfp_t); +extern int cachefiles_allocate_pages(struct fscache_retrieval *, + struct list_head *, unsigned *, gfp_t); +extern int cachefiles_write_page(struct fscache_storage *, struct page *); +extern void cachefiles_uncache_page(struct fscache_object *, struct page *); + +/* + * security.c + */ +extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); +extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, + struct dentry *root, + const struct cred **_saved_cred); + +static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, + const struct cred **_saved_cred) +{ + *_saved_cred = override_creds(cache->cache_cred); +} + +static inline void cachefiles_end_secure(struct cachefiles_cache *cache, + const struct cred *saved_cred) +{ + revert_creds(saved_cred); +} + +/* + * xattr.c + */ +extern int cachefiles_check_object_type(struct cachefiles_object *object); +extern int cachefiles_set_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_update_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_check_auxdata(struct cachefiles_object *object); +extern int cachefiles_check_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata); +extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct dentry *dentry); + + +/* + * error handling + */ + +#define cachefiles_io_error(___cache, FMT, ...) \ +do { \ + pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ + fscache_io_error(&(___cache)->cache); \ + set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ +} while (0) + +#define cachefiles_io_error_obj(object, FMT, ...) \ +do { \ + struct cachefiles_cache *___cache; \ + \ + ___cache = container_of((object)->fscache.cache, \ + struct cachefiles_cache, cache); \ + cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \ +} while (0) + + +/* + * debug tracing + */ +#define dbgprintk(FMT, ...) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + +#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) + + +#if defined(__KDEBUG) +#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) +#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) +#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) + +#elif defined(CONFIG_CACHEFILES_DEBUG) +#define _enter(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \ + kenter(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \ + kleave(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT, ...) \ +do { \ + if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \ + kdebug(FMT, ##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) +#endif + +#if 1 /* defined(__KDEBUGALL) */ + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + if (unlikely(!((X) OP (Y)))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + if (unlikely((C) && !((X) OP (Y)))) { \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ + (unsigned long)(X), (unsigned long)(Y)); \ + BUG(); \ + } \ +} while (0) + +#else + +#define ASSERT(X) do {} while (0) +#define ASSERTCMP(X, OP, Y) do {} while (0) +#define ASSERTIF(C, X) do {} while (0) +#define ASSERTIFCMP(C, X, OP, Y) do {} while (0) + +#endif diff --git a/kernel/fs/cachefiles/key.c b/kernel/fs/cachefiles/key.c new file mode 100644 index 000000000..33b58c60f --- /dev/null +++ b/kernel/fs/cachefiles/key.c @@ -0,0 +1,159 @@ +/* Key to pathname encoder + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include "internal.h" + +static const char cachefiles_charmap[64] = + "0123456789" /* 0 - 9 */ + "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ + "_-" /* 62 - 63 */ + ; + +static const char cachefiles_filecharmap[256] = { + /* we skip space and tab and control chars */ + [33 ... 46] = 1, /* '!' -> '.' */ + /* we skip '/' as it's significant to pathwalk */ + [48 ... 127] = 1, /* '0' -> '~' */ +}; + +/* + * turn the raw key into something cooked + * - the raw key should include the length in the two bytes at the front + * - the key may be up to 514 bytes in length (including the length word) + * - "base64" encode the strange keys, mapping 3 bytes of raw to four of + * cooked + * - need to cut the cooked key into 252 char lengths (189 raw bytes) + */ +char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) +{ + unsigned char csum, ch; + unsigned int acc; + char *key; + int loop, len, max, seg, mark, print; + + _enter(",%d", keylen); + + BUG_ON(keylen < 2 || keylen > 514); + + csum = raw[0] + raw[1]; + print = 1; + for (loop = 2; loop < keylen; loop++) { + ch = raw[loop]; + csum += ch; + print &= cachefiles_filecharmap[ch]; + } + + if (print) { + /* if the path is usable ASCII, then we render it directly */ + max = keylen - 2; + max += 2; /* two base64'd length chars on the front */ + max += 5; /* @checksum/M */ + max += 3 * 2; /* maximum number of segment dividers (".../M") + * is ((514 + 251) / 252) = 3 + */ + max += 1; /* NUL on end */ + } else { + /* calculate the maximum length of the cooked key */ + keylen = (keylen + 2) / 3; + + max = keylen * 4; + max += 5; /* @checksum/M */ + max += 3 * 2; /* maximum number of segment dividers (".../M") + * is ((514 + 188) / 189) = 3 + */ + max += 1; /* NUL on end */ + } + + max += 1; /* 2nd NUL on end */ + + _debug("max: %d", max); + + key = kmalloc(max, cachefiles_gfp); + if (!key) + return NULL; + + len = 0; + + /* build the cooked key */ + sprintf(key, "@%02x%c+", (unsigned) csum, 0); + len = 5; + mark = len - 1; + + if (print) { + acc = *(uint16_t *) raw; + raw += 2; + + key[len + 1] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len] = cachefiles_charmap[acc & 63]; + len += 2; + + seg = 250; + for (loop = keylen; loop > 0; loop--) { + if (seg <= 0) { + key[len++] = '\0'; + mark = len; + key[len++] = '+'; + seg = 252; + } + + key[len++] = *raw++; + ASSERT(len < max); + } + + switch (type) { + case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break; + case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break; + default: type = 'S'; break; + } + } else { + seg = 252; + for (loop = keylen; loop > 0; loop--) { + if (seg <= 0) { + key[len++] = '\0'; + mark = len; + key[len++] = '+'; + seg = 252; + } + + acc = *raw++; + acc |= *raw++ << 8; + acc |= *raw++ << 16; + + _debug("acc: %06x", acc); + + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + key[len++] = cachefiles_charmap[acc & 63]; + + ASSERT(len < max); + } + + switch (type) { + case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break; + case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break; + default: type = 'T'; break; + } + } + + key[mark] = type; + key[len++] = 0; + key[len] = 0; + + _leave(" = %p %d", key, len); + return key; +} diff --git a/kernel/fs/cachefiles/main.c b/kernel/fs/cachefiles/main.c new file mode 100644 index 000000000..711f13d8c --- /dev/null +++ b/kernel/fs/cachefiles/main.c @@ -0,0 +1,105 @@ +/* Network filesystem caching backend to use cache files on a premounted + * filesystem + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +unsigned cachefiles_debug; +module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask"); + +MODULE_DESCRIPTION("Mounted-filesystem based cache"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +struct kmem_cache *cachefiles_object_jar; + +static struct miscdevice cachefiles_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "cachefiles", + .fops = &cachefiles_daemon_fops, +}; + +static void cachefiles_object_init_once(void *_object) +{ + struct cachefiles_object *object = _object; + + memset(object, 0, sizeof(*object)); + spin_lock_init(&object->work_lock); +} + +/* + * initialise the fs caching module + */ +static int __init cachefiles_init(void) +{ + int ret; + + ret = misc_register(&cachefiles_dev); + if (ret < 0) + goto error_dev; + + /* create an object jar */ + ret = -ENOMEM; + cachefiles_object_jar = + kmem_cache_create("cachefiles_object_jar", + sizeof(struct cachefiles_object), + 0, + SLAB_HWCACHE_ALIGN, + cachefiles_object_init_once); + if (!cachefiles_object_jar) { + pr_notice("Failed to allocate an object jar\n"); + goto error_object_jar; + } + + ret = cachefiles_proc_init(); + if (ret < 0) + goto error_proc; + + pr_info("Loaded\n"); + return 0; + +error_proc: + kmem_cache_destroy(cachefiles_object_jar); +error_object_jar: + misc_deregister(&cachefiles_dev); +error_dev: + pr_err("failed to register: %d\n", ret); + return ret; +} + +fs_initcall(cachefiles_init); + +/* + * clean up on module removal + */ +static void __exit cachefiles_exit(void) +{ + pr_info("Unloading\n"); + + cachefiles_proc_cleanup(); + kmem_cache_destroy(cachefiles_object_jar); + misc_deregister(&cachefiles_dev); +} + +module_exit(cachefiles_exit); diff --git a/kernel/fs/cachefiles/namei.c b/kernel/fs/cachefiles/namei.c new file mode 100644 index 000000000..ab857ab9f --- /dev/null +++ b/kernel/fs/cachefiles/namei.c @@ -0,0 +1,978 @@ +/* CacheFiles path walking and related routines + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#define CACHEFILES_KEYBUF_SIZE 512 + +/* + * dump debugging info about an object + */ +static noinline +void __cachefiles_printk_object(struct cachefiles_object *object, + const char *prefix, + u8 *keybuf) +{ + struct fscache_cookie *cookie; + unsigned keylen, loop; + + pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id); + pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", + prefix, object->fscache.state->name, + object->fscache.flags, work_busy(&object->fscache.work), + object->fscache.events, object->fscache.event_mask); + pr_err("%sops=%u inp=%u exc=%u\n", + prefix, object->fscache.n_ops, object->fscache.n_in_progress, + object->fscache.n_exclusive); + pr_err("%sparent=%p\n", + prefix, object->fscache.parent); + + spin_lock(&object->fscache.lock); + cookie = object->fscache.cookie; + if (cookie) { + pr_err("%scookie=%p [pr=%p nd=%p fl=%lx]\n", + prefix, + object->fscache.cookie, + object->fscache.cookie->parent, + object->fscache.cookie->netfs_data, + object->fscache.cookie->flags); + if (keybuf && cookie->def) + keylen = cookie->def->get_key(cookie->netfs_data, keybuf, + CACHEFILES_KEYBUF_SIZE); + else + keylen = 0; + } else { + pr_err("%scookie=NULL\n", prefix); + keylen = 0; + } + spin_unlock(&object->fscache.lock); + + if (keylen) { + pr_err("%skey=[%u] '", prefix, keylen); + for (loop = 0; loop < keylen; loop++) + pr_cont("%02x", keybuf[loop]); + pr_cont("'\n"); + } +} + +/* + * dump debugging info about a pair of objects + */ +static noinline void cachefiles_printk_object(struct cachefiles_object *object, + struct cachefiles_object *xobject) +{ + u8 *keybuf; + + keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO); + if (object) + __cachefiles_printk_object(object, "", keybuf); + if (xobject) + __cachefiles_printk_object(xobject, "x", keybuf); + kfree(keybuf); +} + +/* + * mark the owner of a dentry, if there is one, to indicate that that dentry + * has been preemptively deleted + * - the caller must hold the i_mutex on the dentry's parent as required to + * call vfs_unlink(), vfs_rmdir() or vfs_rename() + */ +static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, + struct dentry *dentry) +{ + struct cachefiles_object *object; + struct rb_node *p; + + _enter(",'%pd'", dentry); + + write_lock(&cache->active_lock); + + p = cache->active_nodes.rb_node; + while (p) { + object = rb_entry(p, struct cachefiles_object, active_node); + if (object->dentry > dentry) + p = p->rb_left; + else if (object->dentry < dentry) + p = p->rb_right; + else + goto found_dentry; + } + + write_unlock(&cache->active_lock); + _leave(" [no owner]"); + return; + + /* found the dentry for */ +found_dentry: + kdebug("preemptive burial: OBJ%x [%s] %p", + object->fscache.debug_id, + object->fscache.state->name, + dentry); + + if (fscache_object_is_live(&object->fscache)) { + pr_err("\n"); + pr_err("Error: Can't preemptively bury live object\n"); + cachefiles_printk_object(object, NULL); + } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + pr_err("Error: Object already preemptively buried\n"); + } + + write_unlock(&cache->active_lock); + _leave(" [owner marked]"); +} + +/* + * record the fact that an object is now active + */ +static int cachefiles_mark_object_active(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct cachefiles_object *xobject; + struct rb_node **_p, *_parent = NULL; + struct dentry *dentry; + + _enter(",%p", object); + +try_again: + write_lock(&cache->active_lock); + + if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { + pr_err("Error: Object already active\n"); + cachefiles_printk_object(object, NULL); + BUG(); + } + + dentry = object->dentry; + _p = &cache->active_nodes.rb_node; + while (*_p) { + _parent = *_p; + xobject = rb_entry(_parent, + struct cachefiles_object, active_node); + + ASSERT(xobject != object); + + if (xobject->dentry > dentry) + _p = &(*_p)->rb_left; + else if (xobject->dentry < dentry) + _p = &(*_p)->rb_right; + else + goto wait_for_old_object; + } + + rb_link_node(&object->active_node, _parent, _p); + rb_insert_color(&object->active_node, &cache->active_nodes); + + write_unlock(&cache->active_lock); + _leave(" = 0"); + return 0; + + /* an old object from a previous incarnation is hogging the slot - we + * need to wait for it to be destroyed */ +wait_for_old_object: + if (fscache_object_is_live(&xobject->fscache)) { + pr_err("\n"); + pr_err("Error: Unexpected object collision\n"); + cachefiles_printk_object(object, xobject); + BUG(); + } + atomic_inc(&xobject->usage); + write_unlock(&cache->active_lock); + + if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + wait_queue_head_t *wq; + + signed long timeout = 60 * HZ; + wait_queue_t wait; + bool requeue; + + /* if the object we're waiting for is queued for processing, + * then just put ourselves on the queue behind it */ + if (work_pending(&xobject->fscache.work)) { + _debug("queue OBJ%x behind OBJ%x immediately", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + /* otherwise we sleep until either the object we're waiting for + * is done, or the fscache_object is congested */ + wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); + init_wait(&wait); + requeue = false; + do { + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) + break; + + requeue = fscache_object_sleep_till_congested(&timeout); + } while (timeout > 0 && !requeue); + finish_wait(wq, &wait); + + if (requeue && + test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { + _debug("queue OBJ%x behind OBJ%x after wait", + object->fscache.debug_id, + xobject->fscache.debug_id); + goto requeue; + } + + if (timeout <= 0) { + pr_err("\n"); + pr_err("Error: Overlong wait for old active object to go away\n"); + cachefiles_printk_object(object, xobject); + goto requeue; + } + } + + ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)); + + cache->cache.ops->put_object(&xobject->fscache); + goto try_again; + +requeue: + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); + cache->cache.ops->put_object(&xobject->fscache); + _leave(" = -ETIMEDOUT"); + return -ETIMEDOUT; +} + +/* + * delete an object representation from the cache + * - file backed objects are unlinked + * - directory backed objects are stuffed into the graveyard for userspace to + * delete + * - unlocks the directory mutex + */ +static int cachefiles_bury_object(struct cachefiles_cache *cache, + struct dentry *dir, + struct dentry *rep, + bool preemptive) +{ + struct dentry *grave, *trap; + struct path path, path_to_graveyard; + char nbuffer[8 + 8 + 1]; + int ret; + + _enter(",'%pd','%pd'", dir, rep); + + _debug("remove %p from %p", rep, dir); + + /* non-directories can just be unlinked */ + if (!d_is_dir(rep)) { + _debug("unlink stale object"); + + path.mnt = cache->mnt; + path.dentry = dir; + ret = security_path_unlink(&path, rep); + if (ret < 0) { + cachefiles_io_error(cache, "Unlink security error"); + } else { + ret = vfs_unlink(d_inode(dir), rep, NULL); + + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + } + + mutex_unlock(&d_inode(dir)->i_mutex); + + if (ret == -EIO) + cachefiles_io_error(cache, "Unlink failed"); + + _leave(" = %d", ret); + return ret; + } + + /* directories have to be moved to the graveyard */ + _debug("move stale object to graveyard"); + mutex_unlock(&d_inode(dir)->i_mutex); + +try_again: + /* first step is to make up a grave dentry in the graveyard */ + sprintf(nbuffer, "%08x%08x", + (uint32_t) get_seconds(), + (uint32_t) atomic_inc_return(&cache->gravecounter)); + + /* do the multiway lock magic */ + trap = lock_rename(cache->graveyard, dir); + + /* do some checks before getting the grave dentry */ + if (rep->d_parent != dir) { + /* the entry was probably culled when we dropped the parent dir + * lock */ + unlock_rename(cache->graveyard, dir); + _leave(" = 0 [culled?]"); + return 0; + } + + if (!d_can_lookup(cache->graveyard)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Graveyard no longer a directory"); + return -EIO; + } + + if (trap == rep) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + if (d_mountpoint(rep)) { + unlock_rename(cache->graveyard, dir); + cachefiles_io_error(cache, "Mountpoint in cache"); + return -EIO; + } + + grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); + if (IS_ERR(grave)) { + unlock_rename(cache->graveyard, dir); + + if (PTR_ERR(grave) == -ENOMEM) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + cachefiles_io_error(cache, "Lookup error %ld", + PTR_ERR(grave)); + return -EIO; + } + + if (d_is_positive(grave)) { + unlock_rename(cache->graveyard, dir); + dput(grave); + grave = NULL; + cond_resched(); + goto try_again; + } + + if (d_mountpoint(grave)) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "Mountpoint in graveyard"); + return -EIO; + } + + /* target should not be an ancestor of source */ + if (trap == grave) { + unlock_rename(cache->graveyard, dir); + dput(grave); + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + + /* attempt the rename */ + path.mnt = cache->mnt; + path.dentry = dir; + path_to_graveyard.mnt = cache->mnt; + path_to_graveyard.dentry = cache->graveyard; + ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); + if (ret < 0) { + cachefiles_io_error(cache, "Rename security error %d", ret); + } else { + ret = vfs_rename(d_inode(dir), rep, + d_inode(cache->graveyard), grave, NULL, 0); + if (ret != 0 && ret != -ENOMEM) + cachefiles_io_error(cache, + "Rename failed with error %d", ret); + + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + } + + unlock_rename(cache->graveyard, dir); + dput(grave); + _leave(" = 0"); + return 0; +} + +/* + * delete an object representation from the cache + */ +int cachefiles_delete_object(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct dentry *dir; + int ret; + + _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry); + + ASSERT(object->dentry); + ASSERT(d_backing_inode(object->dentry)); + ASSERT(object->dentry->d_parent); + + dir = dget_parent(object->dentry); + + mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); + + if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + /* object allocation for the same key preemptively deleted this + * object's file so that it could create its own file */ + _debug("object preemptively buried"); + mutex_unlock(&d_inode(dir)->i_mutex); + ret = 0; + } else { + /* we need to check that our parent is _still_ our parent - it + * may have been renamed */ + if (dir == object->dentry->d_parent) { + ret = cachefiles_bury_object(cache, dir, + object->dentry, false); + } else { + /* it got moved, presumably by cachefilesd culling it, + * so it's no longer in the key path and we can ignore + * it */ + mutex_unlock(&d_inode(dir)->i_mutex); + ret = 0; + } + } + + dput(dir); + _leave(" = %d", ret); + return ret; +} + +/* + * walk from the parent object to the child object through the backing + * filesystem, creating directories as we go + */ +int cachefiles_walk_to_object(struct cachefiles_object *parent, + struct cachefiles_object *object, + const char *key, + struct cachefiles_xattr *auxdata) +{ + struct cachefiles_cache *cache; + struct dentry *dir, *next = NULL; + struct path path; + unsigned long start; + const char *name; + int ret, nlen; + + _enter("OBJ%x{%p},OBJ%x,%s,", + parent->fscache.debug_id, parent->dentry, + object->fscache.debug_id, key); + + cache = container_of(parent->fscache.cache, + struct cachefiles_cache, cache); + path.mnt = cache->mnt; + + ASSERT(parent->dentry); + ASSERT(d_backing_inode(parent->dentry)); + + if (!(d_is_dir(parent->dentry))) { + // TODO: convert file to dir + _leave("looking up in none directory"); + return -ENOBUFS; + } + + dir = dget(parent->dentry); + +advance: + /* attempt to transit the first directory component */ + name = key; + nlen = strlen(key); + + /* key ends in a double NUL */ + key = key + nlen + 1; + if (!*key) + key = NULL; + +lookup_again: + /* search the current directory for the element name */ + _debug("lookup '%s'", name); + + mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); + + start = jiffies; + next = lookup_one_len(name, dir, nlen); + cachefiles_hist(cachefiles_lookup_histogram, start); + if (IS_ERR(next)) + goto lookup_error; + + _debug("next -> %p %s", next, d_backing_inode(next) ? "positive" : "negative"); + + if (!key) + object->new = !d_backing_inode(next); + + /* if this element of the path doesn't exist, then the lookup phase + * failed, and we can release any readers in the certain knowledge that + * there's nothing for them to actually read */ + if (d_is_negative(next)) + fscache_object_lookup_negative(&object->fscache); + + /* we need to create the object if it's negative */ + if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { + /* index objects and intervening tree levels must be subdirs */ + if (d_is_negative(next)) { + ret = cachefiles_has_space(cache, 1, 0); + if (ret < 0) + goto create_error; + + path.dentry = dir; + ret = security_path_mkdir(&path, next, 0); + if (ret < 0) + goto create_error; + start = jiffies; + ret = vfs_mkdir(d_inode(dir), next, 0); + cachefiles_hist(cachefiles_mkdir_histogram, start); + if (ret < 0) + goto create_error; + + ASSERT(d_backing_inode(next)); + + _debug("mkdir -> %p{%p{ino=%lu}}", + next, d_backing_inode(next), d_backing_inode(next)->i_ino); + + } else if (!d_can_lookup(next)) { + pr_err("inode %lu is not a directory\n", + d_backing_inode(next)->i_ino); + ret = -ENOBUFS; + goto error; + } + + } else { + /* non-index objects start out life as files */ + if (d_is_negative(next)) { + ret = cachefiles_has_space(cache, 1, 0); + if (ret < 0) + goto create_error; + + path.dentry = dir; + ret = security_path_mknod(&path, next, S_IFREG, 0); + if (ret < 0) + goto create_error; + start = jiffies; + ret = vfs_create(d_inode(dir), next, S_IFREG, true); + cachefiles_hist(cachefiles_create_histogram, start); + if (ret < 0) + goto create_error; + + ASSERT(d_backing_inode(next)); + + _debug("create -> %p{%p{ino=%lu}}", + next, d_backing_inode(next), d_backing_inode(next)->i_ino); + + } else if (!d_can_lookup(next) && + !d_is_reg(next) + ) { + pr_err("inode %lu is not a file or directory\n", + d_backing_inode(next)->i_ino); + ret = -ENOBUFS; + goto error; + } + } + + /* process the next component */ + if (key) { + _debug("advance"); + mutex_unlock(&d_inode(dir)->i_mutex); + dput(dir); + dir = next; + next = NULL; + goto advance; + } + + /* we've found the object we were looking for */ + object->dentry = next; + + /* if we've found that the terminal object exists, then we need to + * check its attributes and delete it if it's out of date */ + if (!object->new) { + _debug("validate '%pd'", next); + + ret = cachefiles_check_object_xattr(object, auxdata); + if (ret == -ESTALE) { + /* delete the object (the deleter drops the directory + * mutex) */ + object->dentry = NULL; + + ret = cachefiles_bury_object(cache, dir, next, true); + dput(next); + next = NULL; + + if (ret < 0) + goto delete_error; + + _debug("redo lookup"); + goto lookup_again; + } + } + + /* note that we're now using this object */ + ret = cachefiles_mark_object_active(cache, object); + + mutex_unlock(&d_inode(dir)->i_mutex); + dput(dir); + dir = NULL; + + if (ret == -ETIMEDOUT) + goto mark_active_timed_out; + + _debug("=== OBTAINED_OBJECT ==="); + + if (object->new) { + /* attach data to a newly constructed terminal object */ + ret = cachefiles_set_object_xattr(object, auxdata); + if (ret < 0) + goto check_error; + } else { + /* always update the atime on an object we've just looked up + * (this is used to keep track of culling, and atimes are only + * updated by read, write and readdir but not lookup or + * open) */ + path.dentry = next; + touch_atime(&path); + } + + /* open a file interface onto a data file */ + if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (d_is_reg(object->dentry)) { + const struct address_space_operations *aops; + + ret = -EPERM; + aops = d_backing_inode(object->dentry)->i_mapping->a_ops; + if (!aops->bmap) + goto check_error; + + object->backer = object->dentry; + } else { + BUG(); // TODO: open file in data-class subdir + } + } + + object->new = 0; + fscache_obtained_object(&object->fscache); + + _leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino); + return 0; + +create_error: + _debug("create error %d", ret); + if (ret == -EIO) + cachefiles_io_error(cache, "Create/mkdir failed"); + goto error; + +mark_active_timed_out: + _debug("mark active timed out"); + goto release_dentry; + +check_error: + _debug("check error %d", ret); + write_lock(&cache->active_lock); + rb_erase(&object->active_node, &cache->active_nodes); + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); + wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); + write_unlock(&cache->active_lock); +release_dentry: + dput(object->dentry); + object->dentry = NULL; + goto error_out; + +delete_error: + _debug("delete error %d", ret); + goto error_out2; + +lookup_error: + _debug("lookup error %ld", PTR_ERR(next)); + ret = PTR_ERR(next); + if (ret == -EIO) + cachefiles_io_error(cache, "Lookup failed"); + next = NULL; +error: + mutex_unlock(&d_inode(dir)->i_mutex); + dput(next); +error_out2: + dput(dir); +error_out: + _leave(" = error %d", -ret); + return ret; +} + +/* + * get a subdirectory + */ +struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *dirname) +{ + struct dentry *subdir; + unsigned long start; + struct path path; + int ret; + + _enter(",,%s", dirname); + + /* search the current directory for the element name */ + mutex_lock(&d_inode(dir)->i_mutex); + + start = jiffies; + subdir = lookup_one_len(dirname, dir, strlen(dirname)); + cachefiles_hist(cachefiles_lookup_histogram, start); + if (IS_ERR(subdir)) { + if (PTR_ERR(subdir) == -ENOMEM) + goto nomem_d_alloc; + goto lookup_error; + } + + _debug("subdir -> %p %s", + subdir, d_backing_inode(subdir) ? "positive" : "negative"); + + /* we need to create the subdir if it doesn't exist yet */ + if (d_is_negative(subdir)) { + ret = cachefiles_has_space(cache, 1, 0); + if (ret < 0) + goto mkdir_error; + + _debug("attempt mkdir"); + + path.mnt = cache->mnt; + path.dentry = dir; + ret = security_path_mkdir(&path, subdir, 0700); + if (ret < 0) + goto mkdir_error; + ret = vfs_mkdir(d_inode(dir), subdir, 0700); + if (ret < 0) + goto mkdir_error; + + ASSERT(d_backing_inode(subdir)); + + _debug("mkdir -> %p{%p{ino=%lu}}", + subdir, + d_backing_inode(subdir), + d_backing_inode(subdir)->i_ino); + } + + mutex_unlock(&d_inode(dir)->i_mutex); + + /* we need to make sure the subdir is a directory */ + ASSERT(d_backing_inode(subdir)); + + if (!d_can_lookup(subdir)) { + pr_err("%s is not a directory\n", dirname); + ret = -EIO; + goto check_error; + } + + ret = -EPERM; + if (!d_backing_inode(subdir)->i_op->setxattr || + !d_backing_inode(subdir)->i_op->getxattr || + !d_backing_inode(subdir)->i_op->lookup || + !d_backing_inode(subdir)->i_op->mkdir || + !d_backing_inode(subdir)->i_op->create || + (!d_backing_inode(subdir)->i_op->rename && + !d_backing_inode(subdir)->i_op->rename2) || + !d_backing_inode(subdir)->i_op->rmdir || + !d_backing_inode(subdir)->i_op->unlink) + goto check_error; + + _leave(" = [%lu]", d_backing_inode(subdir)->i_ino); + return subdir; + +check_error: + dput(subdir); + _leave(" = %d [check]", ret); + return ERR_PTR(ret); + +mkdir_error: + mutex_unlock(&d_inode(dir)->i_mutex); + dput(subdir); + pr_err("mkdir %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); + +lookup_error: + mutex_unlock(&d_inode(dir)->i_mutex); + ret = PTR_ERR(subdir); + pr_err("Lookup %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); + +nomem_d_alloc: + mutex_unlock(&d_inode(dir)->i_mutex); + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); +} + +/* + * find out if an object is in use or not + * - if finds object and it's not in use: + * - returns a pointer to the object and a reference on it + * - returns with the directory locked + */ +static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, + struct dentry *dir, + char *filename) +{ + struct cachefiles_object *object; + struct rb_node *_n; + struct dentry *victim; + unsigned long start; + int ret; + + //_enter(",%pd/,%s", + // dir, filename); + + /* look up the victim */ + mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); + + start = jiffies; + victim = lookup_one_len(filename, dir, strlen(filename)); + cachefiles_hist(cachefiles_lookup_histogram, start); + if (IS_ERR(victim)) + goto lookup_error; + + //_debug("victim -> %p %s", + // victim, d_backing_inode(victim) ? "positive" : "negative"); + + /* if the object is no longer there then we probably retired the object + * at the netfs's request whilst the cull was in progress + */ + if (d_is_negative(victim)) { + mutex_unlock(&d_inode(dir)->i_mutex); + dput(victim); + _leave(" = -ENOENT [absent]"); + return ERR_PTR(-ENOENT); + } + + /* check to see if we're using this object */ + read_lock(&cache->active_lock); + + _n = cache->active_nodes.rb_node; + + while (_n) { + object = rb_entry(_n, struct cachefiles_object, active_node); + + if (object->dentry > victim) + _n = _n->rb_left; + else if (object->dentry < victim) + _n = _n->rb_right; + else + goto object_in_use; + } + + read_unlock(&cache->active_lock); + + //_leave(" = %p", victim); + return victim; + +object_in_use: + read_unlock(&cache->active_lock); + mutex_unlock(&d_inode(dir)->i_mutex); + dput(victim); + //_leave(" = -EBUSY [in use]"); + return ERR_PTR(-EBUSY); + +lookup_error: + mutex_unlock(&d_inode(dir)->i_mutex); + ret = PTR_ERR(victim); + if (ret == -ENOENT) { + /* file or dir now absent - probably retired by netfs */ + _leave(" = -ESTALE [absent]"); + return ERR_PTR(-ESTALE); + } + + if (ret == -EIO) { + cachefiles_io_error(cache, "Lookup failed"); + } else if (ret != -ENOMEM) { + pr_err("Internal error: %d\n", ret); + ret = -EIO; + } + + _leave(" = %d", ret); + return ERR_PTR(ret); +} + +/* + * cull an object if it's not in use + * - called only by cache manager daemon + */ +int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, + char *filename) +{ + struct dentry *victim; + int ret; + + _enter(",%pd/,%s", dir, filename); + + victim = cachefiles_check_active(cache, dir, filename); + if (IS_ERR(victim)) + return PTR_ERR(victim); + + _debug("victim -> %p %s", + victim, d_backing_inode(victim) ? "positive" : "negative"); + + /* okay... the victim is not being used so we can cull it + * - start by marking it as stale + */ + _debug("victim is cullable"); + + ret = cachefiles_remove_object_xattr(cache, victim); + if (ret < 0) + goto error_unlock; + + /* actually remove the victim (drops the dir mutex) */ + _debug("bury"); + + ret = cachefiles_bury_object(cache, dir, victim, false); + if (ret < 0) + goto error; + + dput(victim); + _leave(" = 0"); + return 0; + +error_unlock: + mutex_unlock(&d_inode(dir)->i_mutex); +error: + dput(victim); + if (ret == -ENOENT) { + /* file or dir now absent - probably retired by netfs */ + _leave(" = -ESTALE [absent]"); + return -ESTALE; + } + + if (ret != -ENOMEM) { + pr_err("Internal error: %d\n", ret); + ret = -EIO; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * find out if an object is in use or not + * - called only by cache manager daemon + * - returns -EBUSY or 0 to indicate whether an object is in use or not + */ +int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, + char *filename) +{ + struct dentry *victim; + + //_enter(",%pd/,%s", + // dir, filename); + + victim = cachefiles_check_active(cache, dir, filename); + if (IS_ERR(victim)) + return PTR_ERR(victim); + + mutex_unlock(&d_inode(dir)->i_mutex); + dput(victim); + //_leave(" = 0"); + return 0; +} diff --git a/kernel/fs/cachefiles/proc.c b/kernel/fs/cachefiles/proc.c new file mode 100644 index 000000000..eccd33941 --- /dev/null +++ b/kernel/fs/cachefiles/proc.c @@ -0,0 +1,134 @@ +/* CacheFiles statistics + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include "internal.h" + +atomic_t cachefiles_lookup_histogram[HZ]; +atomic_t cachefiles_mkdir_histogram[HZ]; +atomic_t cachefiles_create_histogram[HZ]; + +/* + * display the latency histogram + */ +static int cachefiles_histogram_show(struct seq_file *m, void *v) +{ + unsigned long index; + unsigned x, y, z, t; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n"); + return 0; + case 2: + seq_puts(m, "===== ===== ========= ========= =========\n"); + return 0; + default: + index = (unsigned long) v - 3; + x = atomic_read(&cachefiles_lookup_histogram[index]); + y = atomic_read(&cachefiles_mkdir_histogram[index]); + z = atomic_read(&cachefiles_create_histogram[index]); + if (x == 0 && y == 0 && z == 0) + return 0; + + t = (index * 1000) / HZ; + + seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z); + return 0; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos) +{ + if ((unsigned long long)*_pos >= HZ + 2) + return NULL; + if (*_pos == 0) + *_pos = 1; + return (void *)(unsigned long) *_pos; +} + +/* + * move to the next line + */ +static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (unsigned long long)*pos > HZ + 2 ? + NULL : (void *)(unsigned long) *pos; +} + +/* + * clean up after reading + */ +static void cachefiles_histogram_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations cachefiles_histogram_ops = { + .start = cachefiles_histogram_start, + .stop = cachefiles_histogram_stop, + .next = cachefiles_histogram_next, + .show = cachefiles_histogram_show, +}; + +/* + * open "/proc/fs/cachefiles/XXX" which provide statistics summaries + */ +static int cachefiles_histogram_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cachefiles_histogram_ops); +} + +static const struct file_operations cachefiles_histogram_fops = { + .owner = THIS_MODULE, + .open = cachefiles_histogram_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * initialise the /proc/fs/cachefiles/ directory + */ +int __init cachefiles_proc_init(void) +{ + _enter(""); + + if (!proc_mkdir("fs/cachefiles", NULL)) + goto error_dir; + + if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL, + &cachefiles_histogram_fops)) + goto error_histogram; + + _leave(" = 0"); + return 0; + +error_histogram: + remove_proc_entry("fs/cachefiles", NULL); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * clean up the /proc/fs/cachefiles/ directory + */ +void cachefiles_proc_cleanup(void) +{ + remove_proc_entry("fs/cachefiles/histogram", NULL); + remove_proc_entry("fs/cachefiles", NULL); +} diff --git a/kernel/fs/cachefiles/rdwr.c b/kernel/fs/cachefiles/rdwr.c new file mode 100644 index 000000000..3cbb0e834 --- /dev/null +++ b/kernel/fs/cachefiles/rdwr.c @@ -0,0 +1,967 @@ +/* Storage object read/write + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include "internal.h" + +/* + * detect wake up events generated by the unlocking of pages in which we're + * interested + * - we use this to detect read completion of backing pages + * - the caller holds the waitqueue lock + */ +static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, + int sync, void *_key) +{ + struct cachefiles_one_read *monitor = + container_of(wait, struct cachefiles_one_read, monitor); + struct cachefiles_object *object; + struct wait_bit_key *key = _key; + struct page *page = wait->private; + + ASSERT(key); + + _enter("{%lu},%u,%d,{%p,%u}", + monitor->netfs_page->index, mode, sync, + key->flags, key->bit_nr); + + if (key->flags != &page->flags || + key->bit_nr != PG_locked) + return 0; + + _debug("--- monitor %p %lx ---", page, page->flags); + + if (!PageUptodate(page) && !PageError(page)) { + /* unlocked, not uptodate and not erronous? */ + _debug("page probably truncated"); + } + + /* remove from the waitqueue */ + list_del(&wait->task_list); + + /* move onto the action list and queue for FS-Cache thread pool */ + ASSERT(monitor->op); + + object = container_of(monitor->op->op.object, + struct cachefiles_object, fscache); + + spin_lock(&object->work_lock); + list_add_tail(&monitor->op_link, &monitor->op->to_do); + spin_unlock(&object->work_lock); + + fscache_enqueue_retrieval(monitor->op); + return 0; +} + +/* + * handle a probably truncated page + * - check to see if the page is still relevant and reissue the read if + * possible + * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we + * must wait again and 0 if successful + */ +static int cachefiles_read_reissue(struct cachefiles_object *object, + struct cachefiles_one_read *monitor) +{ + struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping; + struct page *backpage = monitor->back_page, *backpage2; + int ret; + + _enter("{ino=%lx},{%lx,%lx}", + d_backing_inode(object->backer)->i_ino, + backpage->index, backpage->flags); + + /* skip if the page was truncated away completely */ + if (backpage->mapping != bmapping) { + _leave(" = -ENODATA [mapping]"); + return -ENODATA; + } + + backpage2 = find_get_page(bmapping, backpage->index); + if (!backpage2) { + _leave(" = -ENODATA [gone]"); + return -ENODATA; + } + + if (backpage != backpage2) { + put_page(backpage2); + _leave(" = -ENODATA [different]"); + return -ENODATA; + } + + /* the page is still there and we already have a ref on it, so we don't + * need a second */ + put_page(backpage2); + + INIT_LIST_HEAD(&monitor->op_link); + add_page_wait_queue(backpage, &monitor->monitor); + + if (trylock_page(backpage)) { + ret = -EIO; + if (PageError(backpage)) + goto unlock_discard; + ret = 0; + if (PageUptodate(backpage)) + goto unlock_discard; + + _debug("reissue read"); + ret = bmapping->a_ops->readpage(NULL, backpage); + if (ret < 0) + goto unlock_discard; + } + + /* but the page may have been read before the monitor was installed, so + * the monitor may miss the event - so we have to ensure that we do get + * one in such a case */ + if (trylock_page(backpage)) { + _debug("jumpstart %p {%lx}", backpage, backpage->flags); + unlock_page(backpage); + } + + /* it'll reappear on the todo list */ + _leave(" = -EINPROGRESS"); + return -EINPROGRESS; + +unlock_discard: + unlock_page(backpage); + spin_lock_irq(&object->work_lock); + list_del(&monitor->op_link); + spin_unlock_irq(&object->work_lock); + _leave(" = %d", ret); + return ret; +} + +/* + * copy data from backing pages to netfs pages to complete a read operation + * - driven by FS-Cache's thread pool + */ +static void cachefiles_read_copier(struct fscache_operation *_op) +{ + struct cachefiles_one_read *monitor; + struct cachefiles_object *object; + struct fscache_retrieval *op; + int error, max; + + op = container_of(_op, struct fscache_retrieval, op); + object = container_of(op->op.object, + struct cachefiles_object, fscache); + + _enter("{ino=%lu}", d_backing_inode(object->backer)->i_ino); + + max = 8; + spin_lock_irq(&object->work_lock); + + while (!list_empty(&op->to_do)) { + monitor = list_entry(op->to_do.next, + struct cachefiles_one_read, op_link); + list_del(&monitor->op_link); + + spin_unlock_irq(&object->work_lock); + + _debug("- copy {%lu}", monitor->back_page->index); + + recheck: + if (test_bit(FSCACHE_COOKIE_INVALIDATING, + &object->fscache.cookie->flags)) { + error = -ESTALE; + } else if (PageUptodate(monitor->back_page)) { + copy_highpage(monitor->netfs_page, monitor->back_page); + fscache_mark_page_cached(monitor->op, + monitor->netfs_page); + error = 0; + } else if (!PageError(monitor->back_page)) { + /* the page has probably been truncated */ + error = cachefiles_read_reissue(object, monitor); + if (error == -EINPROGRESS) + goto next; + goto recheck; + } else { + cachefiles_io_error_obj( + object, + "Readpage failed on backing file %lx", + (unsigned long) monitor->back_page->flags); + error = -EIO; + } + + page_cache_release(monitor->back_page); + + fscache_end_io(op, monitor->netfs_page, error); + page_cache_release(monitor->netfs_page); + fscache_retrieval_complete(op, 1); + fscache_put_retrieval(op); + kfree(monitor); + + next: + /* let the thread pool have some air occasionally */ + max--; + if (max < 0 || need_resched()) { + if (!list_empty(&op->to_do)) + fscache_enqueue_retrieval(op); + _leave(" [maxed out]"); + return; + } + + spin_lock_irq(&object->work_lock); + } + + spin_unlock_irq(&object->work_lock); + _leave(""); +} + +/* + * read the corresponding page to the given set from the backing file + * - an uncertain page is simply discarded, to be tried again another time + */ +static int cachefiles_read_backing_file_one(struct cachefiles_object *object, + struct fscache_retrieval *op, + struct page *netpage) +{ + struct cachefiles_one_read *monitor; + struct address_space *bmapping; + struct page *newpage, *backpage; + int ret; + + _enter(""); + + _debug("read back %p{%lu,%d}", + netpage, netpage->index, page_count(netpage)); + + monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); + if (!monitor) + goto nomem; + + monitor->netfs_page = netpage; + monitor->op = fscache_get_retrieval(op); + + init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); + + /* attempt to get hold of the backing page */ + bmapping = d_backing_inode(object->backer)->i_mapping; + newpage = NULL; + + for (;;) { + backpage = find_get_page(bmapping, netpage->index); + if (backpage) + goto backing_page_already_present; + + if (!newpage) { + newpage = __page_cache_alloc(cachefiles_gfp | + __GFP_COLD); + if (!newpage) + goto nomem_monitor; + } + + ret = add_to_page_cache_lru(newpage, bmapping, + netpage->index, cachefiles_gfp); + if (ret == 0) + goto installed_new_backing_page; + if (ret != -EEXIST) + goto nomem_page; + } + + /* we've installed a new backing page, so now we need to start + * it reading */ +installed_new_backing_page: + _debug("- new %p", newpage); + + backpage = newpage; + newpage = NULL; + +read_backing_page: + ret = bmapping->a_ops->readpage(NULL, backpage); + if (ret < 0) + goto read_error; + + /* set the monitor to transfer the data across */ +monitor_backing_page: + _debug("- monitor add"); + + /* install the monitor */ + page_cache_get(monitor->netfs_page); + page_cache_get(backpage); + monitor->back_page = backpage; + monitor->monitor.private = backpage; + add_page_wait_queue(backpage, &monitor->monitor); + monitor = NULL; + + /* but the page may have been read before the monitor was installed, so + * the monitor may miss the event - so we have to ensure that we do get + * one in such a case */ + if (trylock_page(backpage)) { + _debug("jumpstart %p {%lx}", backpage, backpage->flags); + unlock_page(backpage); + } + goto success; + + /* if the backing page is already present, it can be in one of + * three states: read in progress, read failed or read okay */ +backing_page_already_present: + _debug("- present"); + + if (newpage) { + page_cache_release(newpage); + newpage = NULL; + } + + if (PageError(backpage)) + goto io_error; + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate; + + if (!trylock_page(backpage)) + goto monitor_backing_page; + _debug("read %p {%lx}", backpage, backpage->flags); + goto read_backing_page; + + /* the backing page is already up to date, attach the netfs + * page to the pagecache and LRU and copy the data across */ +backing_page_already_uptodate: + _debug("- uptodate"); + + fscache_mark_page_cached(op, netpage); + + copy_highpage(netpage, backpage); + fscache_end_io(op, netpage, 0); + fscache_retrieval_complete(op, 1); + +success: + _debug("success"); + ret = 0; + +out: + if (backpage) + page_cache_release(backpage); + if (monitor) { + fscache_put_retrieval(monitor->op); + kfree(monitor); + } + _leave(" = %d", ret); + return ret; + +read_error: + _debug("read error %d", ret); + if (ret == -ENOMEM) { + fscache_retrieval_complete(op, 1); + goto out; + } +io_error: + cachefiles_io_error_obj(object, "Page read error on backing file"); + fscache_retrieval_complete(op, 1); + ret = -ENOBUFS; + goto out; + +nomem_page: + page_cache_release(newpage); +nomem_monitor: + fscache_put_retrieval(monitor->op); + kfree(monitor); +nomem: + fscache_retrieval_complete(op, 1); + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * read a page from the cache or allocate a block in which to store it + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if no buffers can be made available + * - returns -ENOBUFS if page is beyond EOF + * - if the page is backed by a block in the cache: + * - a read will be started which will call the callback on completion + * - 0 will be returned + * - else if the page is unbacked: + * - the metadata will be retained + * - -ENODATA will be returned + */ +int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, + struct page *page, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct inode *inode; + sector_t block0, block; + unsigned shift; + int ret; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("{%p},{%lx},,,", object, page->index); + + if (!object->backer) + goto enobufs; + + inode = d_backing_inode(object->backer); + ASSERT(S_ISREG(inode->i_mode)); + ASSERT(inode->i_mapping->a_ops->bmap); + ASSERT(inode->i_mapping->a_ops->readpages); + + /* calculate the shift required to use bmap */ + if (inode->i_sb->s_blocksize > PAGE_SIZE) + goto enobufs; + + shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; + + op->op.flags &= FSCACHE_OP_KEEP_FLAGS; + op->op.flags |= FSCACHE_OP_ASYNC; + op->op.processor = cachefiles_read_copier; + + /* we assume the absence or presence of the first block is a good + * enough indication for the page as a whole + * - TODO: don't use bmap() for this as it is _not_ actually good + * enough for this as it doesn't indicate errors, but it's all we've + * got for the moment + */ + block0 = page->index; + block0 <<= shift; + + block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); + _debug("%llx -> %llx", + (unsigned long long) block0, + (unsigned long long) block); + + if (block) { + /* submit the apparently valid page to the backing fs to be + * read from disk */ + ret = cachefiles_read_backing_file_one(object, op, page); + } else if (cachefiles_has_space(cache, 0, 1) == 0) { + /* there's space in the cache we can use */ + fscache_mark_page_cached(op, page); + fscache_retrieval_complete(op, 1); + ret = -ENODATA; + } else { + goto enobufs; + } + + _leave(" = %d", ret); + return ret; + +enobufs: + fscache_retrieval_complete(op, 1); + _leave(" = -ENOBUFS"); + return -ENOBUFS; +} + +/* + * read the corresponding pages to the given set from the backing file + * - any uncertain pages are simply discarded, to be tried again another time + */ +static int cachefiles_read_backing_file(struct cachefiles_object *object, + struct fscache_retrieval *op, + struct list_head *list) +{ + struct cachefiles_one_read *monitor = NULL; + struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping; + struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; + int ret = 0; + + _enter(""); + + list_for_each_entry_safe(netpage, _n, list, lru) { + list_del(&netpage->lru); + + _debug("read back %p{%lu,%d}", + netpage, netpage->index, page_count(netpage)); + + if (!monitor) { + monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); + if (!monitor) + goto nomem; + + monitor->op = fscache_get_retrieval(op); + init_waitqueue_func_entry(&monitor->monitor, + cachefiles_read_waiter); + } + + for (;;) { + backpage = find_get_page(bmapping, netpage->index); + if (backpage) + goto backing_page_already_present; + + if (!newpage) { + newpage = __page_cache_alloc(cachefiles_gfp | + __GFP_COLD); + if (!newpage) + goto nomem; + } + + ret = add_to_page_cache_lru(newpage, bmapping, + netpage->index, + cachefiles_gfp); + if (ret == 0) + goto installed_new_backing_page; + if (ret != -EEXIST) + goto nomem; + } + + /* we've installed a new backing page, so now we need + * to start it reading */ + installed_new_backing_page: + _debug("- new %p", newpage); + + backpage = newpage; + newpage = NULL; + + reread_backing_page: + ret = bmapping->a_ops->readpage(NULL, backpage); + if (ret < 0) + goto read_error; + + /* add the netfs page to the pagecache and LRU, and set the + * monitor to transfer the data across */ + monitor_backing_page: + _debug("- monitor add"); + + ret = add_to_page_cache_lru(netpage, op->mapping, + netpage->index, cachefiles_gfp); + if (ret < 0) { + if (ret == -EEXIST) { + page_cache_release(netpage); + fscache_retrieval_complete(op, 1); + continue; + } + goto nomem; + } + + /* install a monitor */ + page_cache_get(netpage); + monitor->netfs_page = netpage; + + page_cache_get(backpage); + monitor->back_page = backpage; + monitor->monitor.private = backpage; + add_page_wait_queue(backpage, &monitor->monitor); + monitor = NULL; + + /* but the page may have been read before the monitor was + * installed, so the monitor may miss the event - so we have to + * ensure that we do get one in such a case */ + if (trylock_page(backpage)) { + _debug("2unlock %p {%lx}", backpage, backpage->flags); + unlock_page(backpage); + } + + page_cache_release(backpage); + backpage = NULL; + + page_cache_release(netpage); + netpage = NULL; + continue; + + /* if the backing page is already present, it can be in one of + * three states: read in progress, read failed or read okay */ + backing_page_already_present: + _debug("- present %p", backpage); + + if (PageError(backpage)) + goto io_error; + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate; + + _debug("- not ready %p{%lx}", backpage, backpage->flags); + + if (!trylock_page(backpage)) + goto monitor_backing_page; + + if (PageError(backpage)) { + _debug("error %lx", backpage->flags); + unlock_page(backpage); + goto io_error; + } + + if (PageUptodate(backpage)) + goto backing_page_already_uptodate_unlock; + + /* we've locked a page that's neither up to date nor erroneous, + * so we need to attempt to read it again */ + goto reread_backing_page; + + /* the backing page is already up to date, attach the netfs + * page to the pagecache and LRU and copy the data across */ + backing_page_already_uptodate_unlock: + _debug("uptodate %lx", backpage->flags); + unlock_page(backpage); + backing_page_already_uptodate: + _debug("- uptodate"); + + ret = add_to_page_cache_lru(netpage, op->mapping, + netpage->index, cachefiles_gfp); + if (ret < 0) { + if (ret == -EEXIST) { + page_cache_release(netpage); + fscache_retrieval_complete(op, 1); + continue; + } + goto nomem; + } + + copy_highpage(netpage, backpage); + + page_cache_release(backpage); + backpage = NULL; + + fscache_mark_page_cached(op, netpage); + + /* the netpage is unlocked and marked up to date here */ + fscache_end_io(op, netpage, 0); + page_cache_release(netpage); + netpage = NULL; + fscache_retrieval_complete(op, 1); + continue; + } + + netpage = NULL; + + _debug("out"); + +out: + /* tidy up */ + if (newpage) + page_cache_release(newpage); + if (netpage) + page_cache_release(netpage); + if (backpage) + page_cache_release(backpage); + if (monitor) { + fscache_put_retrieval(op); + kfree(monitor); + } + + list_for_each_entry_safe(netpage, _n, list, lru) { + list_del(&netpage->lru); + page_cache_release(netpage); + fscache_retrieval_complete(op, 1); + } + + _leave(" = %d", ret); + return ret; + +nomem: + _debug("nomem"); + ret = -ENOMEM; + goto record_page_complete; + +read_error: + _debug("read error %d", ret); + if (ret == -ENOMEM) + goto record_page_complete; +io_error: + cachefiles_io_error_obj(object, "Page read error on backing file"); + ret = -ENOBUFS; +record_page_complete: + fscache_retrieval_complete(op, 1); + goto out; +} + +/* + * read a list of pages from the cache or allocate blocks in which to store + * them + */ +int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, + struct list_head *pages, + unsigned *nr_pages, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct list_head backpages; + struct pagevec pagevec; + struct inode *inode; + struct page *page, *_n; + unsigned shift, nrbackpages; + int ret, ret2, space; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("{OBJ%x,%d},,%d,,", + object->fscache.debug_id, atomic_read(&op->op.usage), + *nr_pages); + + if (!object->backer) + goto all_enobufs; + + space = 1; + if (cachefiles_has_space(cache, 0, *nr_pages) < 0) + space = 0; + + inode = d_backing_inode(object->backer); + ASSERT(S_ISREG(inode->i_mode)); + ASSERT(inode->i_mapping->a_ops->bmap); + ASSERT(inode->i_mapping->a_ops->readpages); + + /* calculate the shift required to use bmap */ + if (inode->i_sb->s_blocksize > PAGE_SIZE) + goto all_enobufs; + + shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; + + pagevec_init(&pagevec, 0); + + op->op.flags &= FSCACHE_OP_KEEP_FLAGS; + op->op.flags |= FSCACHE_OP_ASYNC; + op->op.processor = cachefiles_read_copier; + + INIT_LIST_HEAD(&backpages); + nrbackpages = 0; + + ret = space ? -ENODATA : -ENOBUFS; + list_for_each_entry_safe(page, _n, pages, lru) { + sector_t block0, block; + + /* we assume the absence or presence of the first block is a + * good enough indication for the page as a whole + * - TODO: don't use bmap() for this as it is _not_ actually + * good enough for this as it doesn't indicate errors, but + * it's all we've got for the moment + */ + block0 = page->index; + block0 <<= shift; + + block = inode->i_mapping->a_ops->bmap(inode->i_mapping, + block0); + _debug("%llx -> %llx", + (unsigned long long) block0, + (unsigned long long) block); + + if (block) { + /* we have data - add it to the list to give to the + * backing fs */ + list_move(&page->lru, &backpages); + (*nr_pages)--; + nrbackpages++; + } else if (space && pagevec_add(&pagevec, page) == 0) { + fscache_mark_pages_cached(op, &pagevec); + fscache_retrieval_complete(op, 1); + ret = -ENODATA; + } else { + fscache_retrieval_complete(op, 1); + } + } + + if (pagevec_count(&pagevec) > 0) + fscache_mark_pages_cached(op, &pagevec); + + if (list_empty(pages)) + ret = 0; + + /* submit the apparently valid pages to the backing fs to be read from + * disk */ + if (nrbackpages > 0) { + ret2 = cachefiles_read_backing_file(object, op, &backpages); + if (ret2 == -ENOMEM || ret2 == -EINTR) + ret = ret2; + } + + _leave(" = %d [nr=%u%s]", + ret, *nr_pages, list_empty(pages) ? " empty" : ""); + return ret; + +all_enobufs: + fscache_retrieval_complete(op, *nr_pages); + return -ENOBUFS; +} + +/* + * allocate a block in the cache in which to store a page + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if no buffers can be made available + * - returns -ENOBUFS if page is beyond EOF + * - otherwise: + * - the metadata will be retained + * - 0 will be returned + */ +int cachefiles_allocate_page(struct fscache_retrieval *op, + struct page *page, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + int ret; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,{%lx},", object, page->index); + + ret = cachefiles_has_space(cache, 0, 1); + if (ret == 0) + fscache_mark_page_cached(op, page); + else + ret = -ENOBUFS; + + fscache_retrieval_complete(op, 1); + _leave(" = %d", ret); + return ret; +} + +/* + * allocate blocks in the cache in which to store a set of pages + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if some buffers couldn't be made available + * - returns -ENOBUFS if some pages are beyond EOF + * - otherwise: + * - -ENODATA will be returned + * - metadata will be retained for any page marked + */ +int cachefiles_allocate_pages(struct fscache_retrieval *op, + struct list_head *pages, + unsigned *nr_pages, + gfp_t gfp) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct pagevec pagevec; + struct page *page; + int ret; + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,,,%d,", object, *nr_pages); + + ret = cachefiles_has_space(cache, 0, *nr_pages); + if (ret == 0) { + pagevec_init(&pagevec, 0); + + list_for_each_entry(page, pages, lru) { + if (pagevec_add(&pagevec, page) == 0) + fscache_mark_pages_cached(op, &pagevec); + } + + if (pagevec_count(&pagevec) > 0) + fscache_mark_pages_cached(op, &pagevec); + ret = -ENODATA; + } else { + ret = -ENOBUFS; + } + + fscache_retrieval_complete(op, *nr_pages); + _leave(" = %d", ret); + return ret; +} + +/* + * request a page be stored in the cache + * - cache withdrawal is prevented by the caller + * - this request may be ignored if there's no cache block available, in which + * case -ENOBUFS will be returned + * - if the op is in progress, 0 will be returned + */ +int cachefiles_write_page(struct fscache_storage *op, struct page *page) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct file *file; + struct path path; + loff_t pos, eof; + size_t len; + void *data; + int ret; + + ASSERT(op != NULL); + ASSERT(page != NULL); + + object = container_of(op->op.object, + struct cachefiles_object, fscache); + + _enter("%p,%p{%lx},,,", object, page, page->index); + + if (!object->backer) { + _leave(" = -ENOBUFS"); + return -ENOBUFS; + } + + ASSERT(d_is_reg(object->backer)); + + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + /* write the page to the backing filesystem and let it store it in its + * own time */ + path.mnt = cache->mnt; + path.dentry = object->backer; + file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + } else { + pos = (loff_t) page->index << PAGE_SHIFT; + + /* we mustn't write more data than we have, so we have + * to beware of a partial page at EOF */ + eof = object->fscache.store_limit_l; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + ASSERTCMP(pos, <, eof); + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); + } + } + + data = kmap(page); + ret = __kernel_write(file, data, len, &pos); + kunmap(page); + if (ret != len) + ret = -EIO; + fput(file); + } + + if (ret < 0) { + if (ret == -EIO) + cachefiles_io_error_obj( + object, "Write page to backing file failed"); + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; +} + +/* + * detach a backing block from a page + * - cache withdrawal is prevented by the caller + */ +void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + + object = container_of(_object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + _enter("%p,{%lu}", object, page->index); + + spin_unlock(&object->fscache.cookie->lock); +} diff --git a/kernel/fs/cachefiles/security.c b/kernel/fs/cachefiles/security.c new file mode 100644 index 000000000..31bbc0528 --- /dev/null +++ b/kernel/fs/cachefiles/security.c @@ -0,0 +1,116 @@ +/* CacheFiles security management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include "internal.h" + +/* + * determine the security context within which we access the cache from within + * the kernel + */ +int cachefiles_get_security_ID(struct cachefiles_cache *cache) +{ + struct cred *new; + int ret; + + _enter("{%s}", cache->secctx); + + new = prepare_kernel_cred(current); + if (!new) { + ret = -ENOMEM; + goto error; + } + + if (cache->secctx) { + ret = set_security_override_from_ctx(new, cache->secctx); + if (ret < 0) { + put_cred(new); + pr_err("Security denies permission to nominate security context: error %d\n", + ret); + goto error; + } + } + + cache->cache_cred = new; + ret = 0; +error: + _leave(" = %d", ret); + return ret; +} + +/* + * see if mkdir and create can be performed in the root directory + */ +static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, + struct dentry *root) +{ + int ret; + + ret = security_inode_mkdir(d_backing_inode(root), root, 0); + if (ret < 0) { + pr_err("Security denies permission to make dirs: error %d", + ret); + return ret; + } + + ret = security_inode_create(d_backing_inode(root), root, 0); + if (ret < 0) + pr_err("Security denies permission to create files: error %d", + ret); + + return ret; +} + +/* + * check the security details of the on-disk cache + * - must be called with security override in force + * - must return with a security override in force - even in the case of an + * error + */ +int cachefiles_determine_cache_security(struct cachefiles_cache *cache, + struct dentry *root, + const struct cred **_saved_cred) +{ + struct cred *new; + int ret; + + _enter(""); + + /* duplicate the cache creds for COW (the override is currently in + * force, so we can use prepare_creds() to do this) */ + new = prepare_creds(); + if (!new) + return -ENOMEM; + + cachefiles_end_secure(cache, *_saved_cred); + + /* use the cache root dir's security context as the basis with + * which create files */ + ret = set_create_files_as(new, d_backing_inode(root)); + if (ret < 0) { + abort_creds(new); + cachefiles_begin_secure(cache, _saved_cred); + _leave(" = %d [cfa]", ret); + return ret; + } + + put_cred(cache->cache_cred); + cache->cache_cred = new; + + cachefiles_begin_secure(cache, _saved_cred); + ret = cachefiles_check_cache_dir(cache, root); + + if (ret == -EOPNOTSUPP) + ret = 0; + _leave(" = %d", ret); + return ret; +} diff --git a/kernel/fs/cachefiles/xattr.c b/kernel/fs/cachefiles/xattr.c new file mode 100644 index 000000000..d31c1a72d --- /dev/null +++ b/kernel/fs/cachefiles/xattr.c @@ -0,0 +1,324 @@ +/* CacheFiles extended attribute management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static const char cachefiles_xattr_cache[] = + XATTR_USER_PREFIX "CacheFiles.cache"; + +/* + * check the type label on an object + * - done using xattrs + */ +int cachefiles_check_object_type(struct cachefiles_object *object) +{ + struct dentry *dentry = object->dentry; + char type[3], xtype[3]; + int ret; + + ASSERT(dentry); + ASSERT(d_backing_inode(dentry)); + + if (!object->fscache.cookie) + strcpy(type, "C3"); + else + snprintf(type, 3, "%02x", object->fscache.cookie->def->type); + + _enter("%p{%s}", object, type); + + /* attempt to install a type label directly */ + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2, + XATTR_CREATE); + if (ret == 0) { + _debug("SET"); /* we succeeded */ + goto error; + } + + if (ret != -EEXIST) { + pr_err("Can't set xattr on %pd [%lu] (err %d)\n", + dentry, d_backing_inode(dentry)->i_ino, + -ret); + goto error; + } + + /* read the current type label */ + ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3); + if (ret < 0) { + if (ret == -ERANGE) + goto bad_type_length; + + pr_err("Can't read xattr on %pd [%lu] (err %d)\n", + dentry, d_backing_inode(dentry)->i_ino, + -ret); + goto error; + } + + /* check the type is what we're expecting */ + if (ret != 2) + goto bad_type_length; + + if (xtype[0] != type[0] || xtype[1] != type[1]) + goto bad_type; + + ret = 0; + +error: + _leave(" = %d", ret); + return ret; + +bad_type_length: + pr_err("Cache object %lu type xattr length incorrect\n", + d_backing_inode(dentry)->i_ino); + ret = -EIO; + goto error; + +bad_type: + xtype[2] = 0; + pr_err("Cache object %pd [%lu] type %s not %s\n", + dentry, d_backing_inode(dentry)->i_ino, + xtype, type); + ret = -EIO; + goto error; +} + +/* + * set the state xattr on a cache file + */ +int cachefiles_set_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct dentry *dentry = object->dentry; + int ret; + + ASSERT(dentry); + + _enter("%p,#%d", object, auxdata->len); + + /* attempt to install the cache metadata directly */ + _debug("SET #%u", auxdata->len); + + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, + &auxdata->type, auxdata->len, + XATTR_CREATE); + if (ret < 0 && ret != -ENOMEM) + cachefiles_io_error_obj( + object, + "Failed to set xattr with error %d", ret); + + _leave(" = %d", ret); + return ret; +} + +/* + * update the state xattr on a cache file + */ +int cachefiles_update_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct dentry *dentry = object->dentry; + int ret; + + ASSERT(dentry); + + _enter("%p,#%d", object, auxdata->len); + + /* attempt to install the cache metadata directly */ + _debug("SET #%u", auxdata->len); + + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, + &auxdata->type, auxdata->len, + XATTR_REPLACE); + if (ret < 0 && ret != -ENOMEM) + cachefiles_io_error_obj( + object, + "Failed to update xattr with error %d", ret); + + _leave(" = %d", ret); + return ret; +} + +/* + * check the consistency between the backing cache and the FS-Cache cookie + */ +int cachefiles_check_auxdata(struct cachefiles_object *object) +{ + struct cachefiles_xattr *auxbuf; + enum fscache_checkaux validity; + struct dentry *dentry = object->dentry; + ssize_t xlen; + int ret; + + ASSERT(dentry); + ASSERT(d_backing_inode(dentry)); + ASSERT(object->fscache.cookie->def->check_aux); + + auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); + if (!auxbuf) + return -ENOMEM; + + xlen = vfs_getxattr(dentry, cachefiles_xattr_cache, + &auxbuf->type, 512 + 1); + ret = -ESTALE; + if (xlen < 1 || + auxbuf->type != object->fscache.cookie->def->type) + goto error; + + xlen--; + validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen); + if (validity != FSCACHE_CHECKAUX_OKAY) + goto error; + + ret = 0; +error: + kfree(auxbuf); + return ret; +} + +/* + * check the state xattr on a cache file + * - return -ESTALE if the object should be deleted + */ +int cachefiles_check_object_xattr(struct cachefiles_object *object, + struct cachefiles_xattr *auxdata) +{ + struct cachefiles_xattr *auxbuf; + struct dentry *dentry = object->dentry; + int ret; + + _enter("%p,#%d", object, auxdata->len); + + ASSERT(dentry); + ASSERT(d_backing_inode(dentry)); + + auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp); + if (!auxbuf) { + _leave(" = -ENOMEM"); + return -ENOMEM; + } + + /* read the current type label */ + ret = vfs_getxattr(dentry, cachefiles_xattr_cache, + &auxbuf->type, 512 + 1); + if (ret < 0) { + if (ret == -ENODATA) + goto stale; /* no attribute - power went off + * mid-cull? */ + + if (ret == -ERANGE) + goto bad_type_length; + + cachefiles_io_error_obj(object, + "Can't read xattr on %lu (err %d)", + d_backing_inode(dentry)->i_ino, -ret); + goto error; + } + + /* check the on-disk object */ + if (ret < 1) + goto bad_type_length; + + if (auxbuf->type != auxdata->type) + goto stale; + + auxbuf->len = ret; + + /* consult the netfs */ + if (object->fscache.cookie->def->check_aux) { + enum fscache_checkaux result; + unsigned int dlen; + + dlen = auxbuf->len - 1; + + _debug("checkaux %s #%u", + object->fscache.cookie->def->name, dlen); + + result = fscache_check_aux(&object->fscache, + &auxbuf->data, dlen); + + switch (result) { + /* entry okay as is */ + case FSCACHE_CHECKAUX_OKAY: + goto okay; + + /* entry requires update */ + case FSCACHE_CHECKAUX_NEEDS_UPDATE: + break; + + /* entry requires deletion */ + case FSCACHE_CHECKAUX_OBSOLETE: + goto stale; + + default: + BUG(); + } + + /* update the current label */ + ret = vfs_setxattr(dentry, cachefiles_xattr_cache, + &auxdata->type, auxdata->len, + XATTR_REPLACE); + if (ret < 0) { + cachefiles_io_error_obj(object, + "Can't update xattr on %lu" + " (error %d)", + d_backing_inode(dentry)->i_ino, -ret); + goto error; + } + } + +okay: + ret = 0; + +error: + kfree(auxbuf); + _leave(" = %d", ret); + return ret; + +bad_type_length: + pr_err("Cache object %lu xattr length incorrect\n", + d_backing_inode(dentry)->i_ino); + ret = -EIO; + goto error; + +stale: + ret = -ESTALE; + goto error; +} + +/* + * remove the object's xattr to mark it stale + */ +int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct dentry *dentry) +{ + int ret; + + ret = vfs_removexattr(dentry, cachefiles_xattr_cache); + if (ret < 0) { + if (ret == -ENOENT || ret == -ENODATA) + ret = 0; + else if (ret != -ENOMEM) + cachefiles_io_error(cache, + "Can't remove xattr from %lu" + " (error %d)", + d_backing_inode(dentry)->i_ino, -ret); + } + + _leave(" = %d", ret); + return ret; +} -- cgit 1.2.3-korg