summaryrefslogtreecommitdiffstats
path: root/kernel/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fs/inode.c')
-rw-r--r--kernel/fs/inode.c145
1 files changed, 99 insertions, 46 deletions
diff --git a/kernel/fs/inode.c b/kernel/fs/inode.c
index 6e342cade..1be5f9003 100644
--- a/kernel/fs/inode.c
+++ b/kernel/fs/inode.c
@@ -28,16 +28,16 @@
* inode->i_state, inode->i_hash, __iget()
* Inode LRU list locks protect:
* inode->i_sb->s_inode_lru, inode->i_lru
- * inode_sb_list_lock protects:
- * sb->s_inodes, inode->i_sb_list
+ * inode->i_sb->s_inode_list_lock protects:
+ * inode->i_sb->s_inodes, inode->i_sb_list
* bdi->wb.list_lock protects:
- * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
+ * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
* inode_hash_lock protects:
* inode_hashtable, inode->i_hash
*
* Lock ordering:
*
- * inode_sb_list_lock
+ * inode->i_sb->s_inode_list_lock
* inode->i_lock
* Inode LRU list locks
*
@@ -45,7 +45,7 @@
* inode->i_lock
*
* inode_hash_lock
- * inode_sb_list_lock
+ * inode->i_sb->s_inode_list_lock
* inode->i_lock
*
* iunique_lock
@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly;
static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
-
/*
* Empty aops. Can be used for the cases where the user does not
* define any of the address_space operations.
@@ -152,6 +150,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_pipe = NULL;
inode->i_bdev = NULL;
inode->i_cdev = NULL;
+ inode->i_link = NULL;
inode->i_rdev = 0;
inode->dirtied_when = 0;
@@ -223,6 +222,7 @@ EXPORT_SYMBOL(free_inode_nonrcu);
void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
+ inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
locks_free_lock_context(inode->i_flctx);
@@ -357,7 +357,7 @@ void inode_init_once(struct inode *inode)
memset(inode, 0, sizeof(*inode));
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_devices);
- INIT_LIST_HEAD(&inode->i_wb_list);
+ INIT_LIST_HEAD(&inode->i_io_list);
INIT_LIST_HEAD(&inode->i_lru);
address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
@@ -424,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode)
*/
void inode_sb_list_add(struct inode *inode)
{
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&inode->i_sb->s_inode_list_lock);
list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&inode->i_sb->s_inode_list_lock);
}
EXPORT_SYMBOL_GPL(inode_sb_list_add);
static inline void inode_sb_list_del(struct inode *inode)
{
if (!list_empty(&inode->i_sb_list)) {
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&inode->i_sb->s_inode_list_lock);
list_del_init(&inode->i_sb_list);
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&inode->i_sb->s_inode_list_lock);
}
}
@@ -525,8 +525,8 @@ static void evict(struct inode *inode)
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(!list_empty(&inode->i_lru));
- if (!list_empty(&inode->i_wb_list))
- inode_wb_list_del(inode);
+ if (!list_empty(&inode->i_io_list))
+ inode_io_list_del(inode);
inode_sb_list_del(inode);
@@ -575,6 +575,7 @@ static void dispose_list(struct list_head *head)
list_del_init(&inode->i_lru);
evict(inode);
+ cond_resched();
}
}
@@ -592,7 +593,8 @@ void evict_inodes(struct super_block *sb)
struct inode *inode, *next;
LIST_HEAD(dispose);
- spin_lock(&inode_sb_list_lock);
+again:
+ spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (atomic_read(&inode->i_count))
continue;
@@ -607,8 +609,20 @@ void evict_inodes(struct super_block *sb)
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose);
+
+ /*
+ * We can have a ton of inodes to evict at unmount time given
+ * enough memory, check to see if we need to go to sleep for a
+ * bit so we don't livelock.
+ */
+ if (need_resched()) {
+ spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
+ dispose_list(&dispose);
+ goto again;
+ }
}
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose);
}
@@ -629,7 +643,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
struct inode *inode, *next;
LIST_HEAD(dispose);
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
@@ -652,7 +666,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose);
}
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose);
@@ -839,7 +853,11 @@ unsigned int get_next_ino(void)
}
#endif
- *p = ++res;
+ res++;
+ /* get_next_ino should not provide a 0 inode number */
+ if (unlikely(!res))
+ res++;
+ *p = res;
put_cpu_var(last_ino);
return res;
}
@@ -884,7 +902,7 @@ struct inode *new_inode(struct super_block *sb)
{
struct inode *inode;
- spin_lock_prefetch(&inode_sb_list_lock);
+ spin_lock_prefetch(&sb->s_inode_list_lock);
inode = new_inode_pseudo(sb);
if (inode)
@@ -1579,41 +1597,53 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
/**
* touch_atime - update the access time
* @path: the &struct path to update
+ * @inode: inode to update
*
* Update the accessed time on an inode and mark it for writeback.
* This function automatically handles read only file systems and media,
* as well as the "noatime" flag and inode specific "noatime" markers.
*/
-void touch_atime(const struct path *path)
+bool atime_needs_update(const struct path *path, struct inode *inode)
{
struct vfsmount *mnt = path->mnt;
- struct inode *inode = d_inode(path->dentry);
struct timespec now;
if (inode->i_flags & S_NOATIME)
- return;
+ return false;
if (IS_NOATIME(inode))
- return;
+ return false;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
- return;
+ return false;
if (mnt->mnt_flags & MNT_NOATIME)
- return;
+ return false;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- return;
+ return false;
now = current_fs_time(inode->i_sb);
if (!relatime_need_update(mnt, inode, now))
- return;
+ return false;
if (timespec_equal(&inode->i_atime, &now))
+ return false;
+
+ return true;
+}
+
+void touch_atime(const struct path *path)
+{
+ struct vfsmount *mnt = path->mnt;
+ struct inode *inode = d_inode(path->dentry);
+ struct timespec now;
+
+ if (!atime_needs_update(path, inode))
return;
if (!sb_start_write_trylock(inode->i_sb))
return;
- if (__mnt_want_write(mnt))
+ if (__mnt_want_write(mnt) != 0)
goto skip_update;
/*
* File systems can error out when updating inodes if they need to
@@ -1624,6 +1654,7 @@ void touch_atime(const struct path *path)
* We may also fail on filesystems that have the ability to make parts
* of the fs read only, e.g. subvolumes in Btrfs.
*/
+ now = current_fs_time(inode->i_sb);
update_time(inode, &now, S_ATIME);
__mnt_drop_write(mnt);
skip_update:
@@ -1660,7 +1691,31 @@ int should_remove_suid(struct dentry *dentry)
}
EXPORT_SYMBOL(should_remove_suid);
-static int __remove_suid(struct dentry *dentry, int kill)
+/*
+ * Return mask of changes for notify_change() that need to be done as a
+ * response to write or truncate. Return 0 if nothing has to be changed.
+ * Negative value on error (change should be denied).
+ */
+int dentry_needs_remove_privs(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ int mask = 0;
+ int ret;
+
+ if (IS_NOSEC(inode))
+ return 0;
+
+ mask = should_remove_suid(dentry);
+ ret = security_inode_need_killpriv(dentry);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ mask |= ATTR_KILL_PRIV;
+ return mask;
+}
+EXPORT_SYMBOL(dentry_needs_remove_privs);
+
+static int __remove_privs(struct dentry *dentry, int kill)
{
struct iattr newattrs;
@@ -1672,33 +1727,32 @@ static int __remove_suid(struct dentry *dentry, int kill)
return notify_change(dentry, &newattrs, NULL);
}
-int file_remove_suid(struct file *file)
+/*
+ * Remove special file priviledges (suid, capabilities) when file is written
+ * to or truncated.
+ */
+int file_remove_privs(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
- int killsuid;
- int killpriv;
+ int kill;
int error = 0;
/* Fast path for nothing security related */
if (IS_NOSEC(inode))
return 0;
- killsuid = should_remove_suid(dentry);
- killpriv = security_inode_need_killpriv(dentry);
-
- if (killpriv < 0)
- return killpriv;
- if (killpriv)
- error = security_inode_killpriv(dentry);
- if (!error && killsuid)
- error = __remove_suid(dentry, killsuid);
+ kill = file_needs_remove_privs(file);
+ if (kill < 0)
+ return kill;
+ if (kill)
+ error = __remove_privs(dentry, kill);
if (!error)
inode_has_no_xattr(inode);
return error;
}
-EXPORT_SYMBOL(file_remove_suid);
+EXPORT_SYMBOL(file_remove_privs);
/**
* file_update_time - update mtime and ctime time
@@ -1953,9 +2007,8 @@ EXPORT_SYMBOL(inode_dio_wait);
* inode is being instantiated). The reason for the cmpxchg() loop
* --- which wouldn't be necessary if all code paths which modify
* i_flags actually followed this rule, is that there is at least one
- * code path which doesn't today --- for example,
- * __generic_file_aio_write() calls file_remove_suid() without holding
- * i_mutex --- so we use cmpxchg() out of an abundance of caution.
+ * code path which doesn't today so we use cmpxchg() out of an abundance
+ * of caution.
*
* In the long run, i_mutex is overkill, and we should probably look
* at using the i_lock spinlock to protect i_flags, and then make sure