From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/fs/notify/dnotify/dnotify.c | 14 ++-- kernel/fs/notify/fanotify/fanotify_user.c | 8 ++- kernel/fs/notify/fdinfo.c | 12 +++- kernel/fs/notify/fsnotify.c | 11 ++- kernel/fs/notify/fsnotify.h | 21 ++++-- kernel/fs/notify/inode_mark.c | 40 +++-------- kernel/fs/notify/inotify/inotify_user.c | 18 ++++- kernel/fs/notify/mark.c | 113 +++++++++++++++++------------- kernel/fs/notify/vfsmount_mark.c | 19 ----- 9 files changed, 141 insertions(+), 115 deletions(-) (limited to 'kernel/fs/notify') diff --git a/kernel/fs/notify/dnotify/dnotify.c b/kernel/fs/notify/dnotify/dnotify.c index 44523f4a6..6faaf710e 100644 --- a/kernel/fs/notify/dnotify/dnotify.c +++ b/kernel/fs/notify/dnotify/dnotify.c @@ -154,6 +154,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) struct dnotify_struct *dn; struct dnotify_struct **prev; struct inode *inode; + bool free = false; inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) @@ -182,11 +183,15 @@ void dnotify_flush(struct file *filp, fl_owner_t id) /* nothing else could have found us thanks to the dnotify_groups mark_mutex */ - if (dn_mark->dn == NULL) - fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); + if (dn_mark->dn == NULL) { + fsnotify_detach_mark(fsn_mark); + free = true; + } mutex_unlock(&dnotify_group->mark_mutex); + if (free) + fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); } @@ -362,9 +367,10 @@ out: spin_unlock(&fsn_mark->lock); if (destroy) - fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); - + fsnotify_detach_mark(fsn_mark); mutex_unlock(&dnotify_group->mark_mutex); + if (destroy) + fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); out_err: if (new_fsn_mark) diff --git a/kernel/fs/notify/fanotify/fanotify_user.c b/kernel/fs/notify/fanotify/fanotify_user.c index cf275500a..8e8e6bcd1 100644 --- a/kernel/fs/notify/fanotify/fanotify_user.c +++ b/kernel/fs/notify/fanotify/fanotify_user.c @@ -529,8 +529,10 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); if (destroy_mark) - fsnotify_destroy_mark_locked(fsn_mark, group); + fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); + if (destroy_mark) + fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); if (removed & real_mount(mnt)->mnt_fsnotify_mask) @@ -557,8 +559,10 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); if (destroy_mark) - fsnotify_destroy_mark_locked(fsn_mark, group); + fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); + if (destroy_mark) + fsnotify_free_mark(fsn_mark); /* matches the fsnotify_find_inode_mark() */ fsnotify_put_mark(fsn_mark); diff --git a/kernel/fs/notify/fdinfo.c b/kernel/fs/notify/fdinfo.c index 58b7cdb63..fd98e5100 100644 --- a/kernel/fs/notify/fdinfo.c +++ b/kernel/fs/notify/fdinfo.c @@ -76,15 +76,23 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct inotify_inode_mark *inode_mark; struct inode *inode; - if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE))) + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) || + !(mark->flags & FSNOTIFY_MARK_FLAG_INODE)) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); inode = igrab(mark->inode); if (inode) { + /* + * IN_ALL_EVENTS represents all of the mask bits + * that we expose to userspace. There is at + * least one bit (FS_EVENT_ON_CHILD) which is + * used only internally to the kernel. + */ + u32 mask = mark->mask & IN_ALL_EVENTS; seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, - mark->mask, mark->ignored_mask); + mask, mark->ignored_mask); show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); diff --git a/kernel/fs/notify/fsnotify.c b/kernel/fs/notify/fsnotify.c index dd3fb0b17..db39de2dd 100644 --- a/kernel/fs/notify/fsnotify.c +++ b/kernel/fs/notify/fsnotify.c @@ -26,7 +26,6 @@ #include #include "fsnotify.h" -#include "../mount.h" /* * Clear all of the marks on an inode when it is being evicted from core @@ -204,6 +203,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, else mnt = NULL; + /* + * Optimization: srcu_read_lock() has a memory barrier which can + * be expensive. It protects walking the *_fsnotify_marks lists. + * However, if we do not walk the lists, we do not have to do + * SRCU because we have no references to any objects and do not + * need SRCU to keep them "alive". + */ + if (hlist_empty(&to_tell->i_fsnotify_marks) && + (!mnt || hlist_empty(&mnt->mnt_fsnotify_marks))) + return 0; /* * if this is a modify event we may need to clear the ignored masks * otherwise return if neither the inode nor the vfsmount care about diff --git a/kernel/fs/notify/fsnotify.h b/kernel/fs/notify/fsnotify.h index 13a00be51..b44c68a85 100644 --- a/kernel/fs/notify/fsnotify.h +++ b/kernel/fs/notify/fsnotify.h @@ -6,6 +6,8 @@ #include #include +#include "../mount.h" + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -38,15 +40,22 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); /* inode specific destruction of a mark */ extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark); -/* Destroy all marks in the given list */ -extern void fsnotify_destroy_marks(struct list_head *to_free); /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, struct fsnotify_group *group); -/* run the list of all marks associated with inode and flag them to be freed */ -extern void fsnotify_clear_marks_by_inode(struct inode *inode); -/* run the list of all marks associated with vfsmount and flag them to be freed */ -extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt); +/* Destroy all marks in the given list protected by 'lock' */ +extern void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock); +/* run the list of all marks associated with inode and destroy them */ +static inline void fsnotify_clear_marks_by_inode(struct inode *inode) +{ + fsnotify_destroy_marks(&inode->i_fsnotify_marks, &inode->i_lock); +} +/* run the list of all marks associated with vfsmount and destroy them */ +static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) +{ + fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks, + &mnt->mnt_root->d_lock); +} /* * update the dentry->d_flags of all of inode's children to indicate if inode cares * about events that happen to its children. diff --git a/kernel/fs/notify/inode_mark.c b/kernel/fs/notify/inode_mark.c index 3daf513ee..e785fd954 100644 --- a/kernel/fs/notify/inode_mark.c +++ b/kernel/fs/notify/inode_mark.c @@ -64,26 +64,6 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) spin_unlock(&inode->i_lock); } -/* - * Given an inode, destroy all of the marks associated with that inode. - */ -void fsnotify_clear_marks_by_inode(struct inode *inode) -{ - struct fsnotify_mark *mark; - struct hlist_node *n; - LIST_HEAD(free_list); - - spin_lock(&inode->i_lock); - hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, obj_list) { - list_add(&mark->free_list, &free_list); - hlist_del_init_rcu(&mark->obj_list); - fsnotify_get_mark(mark); - } - spin_unlock(&inode->i_lock); - - fsnotify_destroy_marks(&free_list); -} - /* * Given a group clear all of the inode marks associated with that group. */ @@ -163,17 +143,17 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. - * @list: list of inodes being unmounted (sb->s_inodes) + * @sb: superblock being unmounted. * * Called during unmount with no locks held, so needs to be safe against - * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. + * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. */ -void fsnotify_unmount_inodes(struct list_head *list) +void fsnotify_unmount_inodes(struct super_block *sb) { struct inode *inode, *next_i, *need_iput = NULL; - spin_lock(&inode_sb_list_lock); - list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) { struct inode *need_iput_tmp; /* @@ -209,7 +189,7 @@ void fsnotify_unmount_inodes(struct list_head *list) spin_unlock(&inode->i_lock); /* In case the dropping of a reference would nuke next_i. */ - while (&next_i->i_sb_list != list) { + while (&next_i->i_sb_list != &sb->s_inodes) { spin_lock(&next_i->i_lock); if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && atomic_read(&next_i->i_count)) { @@ -224,12 +204,12 @@ void fsnotify_unmount_inodes(struct list_head *list) } /* - * We can safely drop inode_sb_list_lock here because either + * We can safely drop s_inode_list_lock here because either * we actually hold references on both inode and next_i or * end of list. Also no new inodes will be added since the * umount has begun. */ - spin_unlock(&inode_sb_list_lock); + spin_unlock(&sb->s_inode_list_lock); if (need_iput_tmp) iput(need_iput_tmp); @@ -241,7 +221,7 @@ void fsnotify_unmount_inodes(struct list_head *list) iput(inode); - spin_lock(&inode_sb_list_lock); + spin_lock(&sb->s_inode_list_lock); } - spin_unlock(&inode_sb_list_lock); + spin_unlock(&sb->s_inode_list_lock); } diff --git a/kernel/fs/notify/inotify/inotify_user.c b/kernel/fs/notify/inotify/inotify_user.c index 450648697..b8d08d0d0 100644 --- a/kernel/fs/notify/inotify/inotify_user.c +++ b/kernel/fs/notify/inotify/inotify_user.c @@ -26,7 +26,7 @@ #include /* struct inode */ #include #include -#include /* module_init */ +#include /* fs_initcall */ #include #include /* roundup() */ #include /* LOOKUP_FOLLOW */ @@ -706,7 +706,19 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, int ret; unsigned flags = 0; - /* don't allow invalid bits: we don't want flags set */ + /* + * We share a lot of code with fs/dnotify. We also share + * the bit layout between inotify's IN_* and the fsnotify + * FS_*. This check ensures that only the inotify IN_* + * bits get passed in and set in watches/events. + */ + if (unlikely(mask & ~ALL_INOTIFY_BITS)) + return -EINVAL; + /* + * Require at least one valid bit set in the mask. + * Without _something_ set, we would have no events to + * watch for. + */ if (unlikely(!(mask & ALL_INOTIFY_BITS))) return -EINVAL; @@ -812,4 +824,4 @@ static int __init inotify_user_setup(void) return 0; } -module_init(inotify_user_setup); +fs_initcall(inotify_user_setup); diff --git a/kernel/fs/notify/mark.c b/kernel/fs/notify/mark.c index 39ddcaf09..fc0df4442 100644 --- a/kernel/fs/notify/mark.c +++ b/kernel/fs/notify/mark.c @@ -122,26 +122,27 @@ u32 fsnotify_recalc_mask(struct hlist_head *head) } /* - * Any time a mark is getting freed we end up here. - * The caller had better be holding a reference to this mark so we don't actually - * do the final put under the mark->lock + * Remove mark from inode / vfsmount list, group list, drop inode reference + * if we got one. + * + * Must be called with group->mark_mutex held. */ -void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, - struct fsnotify_group *group) +void fsnotify_detach_mark(struct fsnotify_mark *mark) { struct inode *inode = NULL; + struct fsnotify_group *group = mark->group; BUG_ON(!mutex_is_locked(&group->mark_mutex)); spin_lock(&mark->lock); /* something else already called this function on this mark */ - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { spin_unlock(&mark->lock); return; } - mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; + mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { inode = mark->inode; @@ -150,6 +151,12 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, fsnotify_destroy_vfsmount_mark(mark); else BUG(); + /* + * Note that we didn't update flags telling whether inode cares about + * what's happening with children. We update these flags from + * __fsnotify_parent() lazily when next event happens on one of our + * children. + */ list_del_init(&mark->g_list); @@ -157,18 +164,32 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) iput(inode); - /* release lock temporarily */ - mutex_unlock(&group->mark_mutex); + + atomic_dec(&group->num_marks); +} + +/* + * Free fsnotify mark. The freeing is actually happening from a kthread which + * first waits for srcu period end. Caller must have a reference to the mark + * or be protected by fsnotify_mark_srcu. + */ +void fsnotify_free_mark(struct fsnotify_mark *mark) +{ + struct fsnotify_group *group = mark->group; + + spin_lock(&mark->lock); + /* something else already called this function on this mark */ + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { + spin_unlock(&mark->lock); + return; + } + mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; + spin_unlock(&mark->lock); spin_lock(&destroy_lock); list_add(&mark->g_list, &destroy_list); spin_unlock(&destroy_lock); wake_up(&destroy_waitq); - /* - * We don't necessarily have a ref on mark from caller so the above destroy - * may have actually freed it, unless this group provides a 'freeing_mark' - * function which must be holding a reference. - */ /* * Some groups like to know that marks are being freed. This is a @@ -177,50 +198,45 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, */ if (group->ops->freeing_mark) group->ops->freeing_mark(mark, group); - - /* - * __fsnotify_update_child_dentry_flags(inode); - * - * I really want to call that, but we can't, we have no idea if the inode - * still exists the second we drop the mark->lock. - * - * The next time an event arrive to this inode from one of it's children - * __fsnotify_parent will see that the inode doesn't care about it's - * children and will update all of these flags then. So really this - * is just a lazy update (and could be a perf win...) - */ - - atomic_dec(&group->num_marks); - - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); } void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group) { mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); - fsnotify_destroy_mark_locked(mark, group); + fsnotify_detach_mark(mark); mutex_unlock(&group->mark_mutex); + fsnotify_free_mark(mark); } -/* - * Destroy all marks in the given list. The marks must be already detached from - * the original inode / vfsmount. - */ -void fsnotify_destroy_marks(struct list_head *to_free) +void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock) { - struct fsnotify_mark *mark, *lmark; - struct fsnotify_group *group; - - list_for_each_entry_safe(mark, lmark, to_free, free_list) { - spin_lock(&mark->lock); - fsnotify_get_group(mark->group); - group = mark->group; - spin_unlock(&mark->lock); + struct fsnotify_mark *mark; - fsnotify_destroy_mark(mark, group); + while (1) { + /* + * We have to be careful since we can race with e.g. + * fsnotify_clear_marks_by_group() and once we drop 'lock', + * mark can get removed from the obj_list and destroyed. But + * we are holding mark reference so mark cannot be freed and + * calling fsnotify_destroy_mark() more than once is fine. + */ + spin_lock(lock); + if (hlist_empty(head)) { + spin_unlock(lock); + break; + } + mark = hlist_entry(head->first, struct fsnotify_mark, obj_list); + /* + * We don't update i_fsnotify_mask / mnt_fsnotify_mask here + * since inode / mount is going away anyway. So just remove + * mark from the list. + */ + hlist_del_init_rcu(&mark->obj_list); + fsnotify_get_mark(mark); + spin_unlock(lock); + fsnotify_destroy_mark(mark, mark->group); fsnotify_put_mark(mark); - fsnotify_put_group(group); } } @@ -332,7 +348,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, * inode->i_lock */ spin_lock(&mark->lock); - mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE; + mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; fsnotify_get_group(group); mark->group = group; @@ -438,8 +454,9 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, } mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); fsnotify_get_mark(mark); - fsnotify_destroy_mark_locked(mark, group); + fsnotify_detach_mark(mark); mutex_unlock(&group->mark_mutex); + fsnotify_free_mark(mark); fsnotify_put_mark(mark); } } diff --git a/kernel/fs/notify/vfsmount_mark.c b/kernel/fs/notify/vfsmount_mark.c index 326b148e6..a8fcab68f 100644 --- a/kernel/fs/notify/vfsmount_mark.c +++ b/kernel/fs/notify/vfsmount_mark.c @@ -28,25 +28,6 @@ #include #include "fsnotify.h" -#include "../mount.h" - -void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) -{ - struct fsnotify_mark *mark; - struct hlist_node *n; - struct mount *m = real_mount(mnt); - LIST_HEAD(free_list); - - spin_lock(&mnt->mnt_root->d_lock); - hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, obj_list) { - list_add(&mark->free_list, &free_list); - hlist_del_init_rcu(&mark->obj_list); - fsnotify_get_mark(mark); - } - spin_unlock(&mnt->mnt_root->d_lock); - - fsnotify_destroy_marks(&free_list); -} void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { -- cgit 1.2.3-korg