From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/drivers/infiniband/core/ucma.c | 186 +++++++++++++++++++++++++++------- 1 file changed, 149 insertions(+), 37 deletions(-) (limited to 'kernel/drivers/infiniband/core/ucma.c') diff --git a/kernel/drivers/infiniband/core/ucma.c b/kernel/drivers/infiniband/core/ucma.c index 45d67e922..8b5a934e1 100644 --- a/kernel/drivers/infiniband/core/ucma.c +++ b/kernel/drivers/infiniband/core/ucma.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,7 @@ struct ucma_file { struct list_head ctx_list; struct list_head event_list; wait_queue_head_t poll_wait; + struct workqueue_struct *close_wq; }; struct ucma_context { @@ -89,6 +91,13 @@ struct ucma_context { struct list_head list; struct list_head mc_list; + /* mark that device is in process of destroying the internal HW + * resources, protected by the global mut + */ + int closing; + /* sync between removal event and id destroy, protected by file mut */ + int destroying; + struct work_struct close_work; }; struct ucma_multicast { @@ -107,6 +116,7 @@ struct ucma_event { struct list_head list; struct rdma_cm_id *cm_id; struct rdma_ucm_event_resp resp; + struct work_struct close_work; }; static DEFINE_MUTEX(mut); @@ -132,8 +142,12 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) mutex_lock(&mut); ctx = _ucma_find_context(id, file); - if (!IS_ERR(ctx)) - atomic_inc(&ctx->ref); + if (!IS_ERR(ctx)) { + if (ctx->closing) + ctx = ERR_PTR(-EIO); + else + atomic_inc(&ctx->ref); + } mutex_unlock(&mut); return ctx; } @@ -144,6 +158,28 @@ static void ucma_put_ctx(struct ucma_context *ctx) complete(&ctx->comp); } +static void ucma_close_event_id(struct work_struct *work) +{ + struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); + + rdma_destroy_id(uevent_close->cm_id); + kfree(uevent_close); +} + +static void ucma_close_id(struct work_struct *work) +{ + struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); + + /* once all inflight tasks are finished, we close all underlying + * resources. The context is still alive till its explicit destryoing + * by its creator. + */ + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); + /* No new events will be generated after destroying the id. */ + rdma_destroy_id(ctx->cm_id); +} + static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; @@ -152,6 +188,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) if (!ctx) return NULL; + INIT_WORK(&ctx->close_work, ucma_close_id); atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); @@ -242,6 +279,44 @@ static void ucma_set_event_context(struct ucma_context *ctx, } } +/* Called with file->mut locked for the relevant context. */ +static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) +{ + struct ucma_context *ctx = cm_id->context; + struct ucma_event *con_req_eve; + int event_found = 0; + + if (ctx->destroying) + return; + + /* only if context is pointing to cm_id that it owns it and can be + * queued to be closed, otherwise that cm_id is an inflight one that + * is part of that context event list pending to be detached and + * reattached to its new context as part of ucma_get_event, + * handled separately below. + */ + if (ctx->cm_id == cm_id) { + mutex_lock(&mut); + ctx->closing = 1; + mutex_unlock(&mut); + queue_work(ctx->file->close_wq, &ctx->close_work); + return; + } + + list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { + if (con_req_eve->cm_id == cm_id && + con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { + list_del(&con_req_eve->list); + INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); + queue_work(ctx->file->close_wq, &con_req_eve->close_work); + event_found = 1; + break; + } + } + if (!event_found) + printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n"); +} + static int ucma_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { @@ -276,14 +351,21 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, * We ignore events for new connections until userspace has set * their context. This can only happen if an error occurs on a * new connection before the user accepts it. This is okay, - * since the accept will just fail later. + * since the accept will just fail later. However, we do need + * to release the underlying HW resources in case of a device + * removal event. */ + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ucma_removal_event_handler(cm_id); + kfree(uevent); goto out; } list_add_tail(&uevent->list, &ctx->file->event_list); wake_up_interruptible(&ctx->file->poll_wait); + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ucma_removal_event_handler(cm_id); out: mutex_unlock(&ctx->file->mut); return ret; @@ -391,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); + ctx->cm_id = rdma_create_id(current->nsproxy->net_ns, + ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; @@ -442,9 +525,15 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) } /* - * We cannot hold file->mut when calling rdma_destroy_id() or we can - * deadlock. We also acquire file->mut in ucma_event_handler(), and - * rdma_destroy_id() will wait until all callbacks have completed. + * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At + * this point, no new events will be reported from the hardware. However, we + * still need to cleanup the UCMA context for this ID. Specifically, there + * might be events that have not yet been consumed by the user space software. + * These might include pending connect requests which we have not completed + * processing. We cannot call rdma_destroy_id while holding the lock of the + * context (file->mut), as it might cause a deadlock. We therefore extract all + * relevant events from the context pending events list while holding the + * mutex. After that we release them as needed. */ static int ucma_free_ctx(struct ucma_context *ctx) { @@ -452,8 +541,6 @@ static int ucma_free_ctx(struct ucma_context *ctx) struct ucma_event *uevent, *tmp; LIST_HEAD(list); - /* No new events will be generated after destroying the id. */ - rdma_destroy_id(ctx->cm_id); ucma_cleanup_multicast(ctx); @@ -501,10 +588,24 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ucma_put_ctx(ctx); - wait_for_completion(&ctx->comp); - resp.events_reported = ucma_free_ctx(ctx); + mutex_lock(&ctx->file->mut); + ctx->destroying = 1; + mutex_unlock(&ctx->file->mut); + flush_workqueue(ctx->file->close_wq); + /* At this point it's guaranteed that there is no inflight + * closing task */ + mutex_lock(&mut); + if (!ctx->closing) { + mutex_unlock(&mut); + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); + rdma_destroy_id(ctx->cm_id); + } else { + mutex_unlock(&mut); + } + + resp.events_reported = ucma_free_ctx(ctx); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; @@ -722,26 +823,13 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(ctx->cm_id->device, - ctx->cm_id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - break; - case IB_LINK_LAYER_ETHERNET: - ucma_copy_iboe_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } - break; - case RDMA_TRANSPORT_IWARP: + + if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iw_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } out: if (copy_to_user((void __user *)(unsigned long)cmd.response, @@ -1125,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx, return -EINVAL; memset(&sa_path, 0, sizeof(sa_path)); - sa_path.vlan_id = 0xffff; ib_sa_unpack_path(path_data->path_rec, &sa_path); ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); @@ -1334,10 +1421,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = ERR_PTR(-ENOENT); else if (mc->ctx->file != file) mc = ERR_PTR(-EINVAL); - else { + else if (!atomic_inc_not_zero(&mc->ctx->ref)) + mc = ERR_PTR(-ENXIO); + else idr_remove(&multicast_idr, mc->id); - atomic_inc(&mc->ctx->ref); - } mutex_unlock(&mut); if (IS_ERR(mc)) { @@ -1367,10 +1454,10 @@ static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) /* Acquire mutex's based on pointer comparison to prevent deadlock. */ if (file1 < file2) { mutex_lock(&file1->mut); - mutex_lock(&file2->mut); + mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); } else { mutex_lock(&file2->mut); - mutex_lock(&file1->mut); + mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); } } @@ -1538,6 +1625,12 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; + file->close_wq = create_singlethread_workqueue("ucma_close_id"); + if (!file->close_wq) { + kfree(file); + return -ENOMEM; + } + INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); @@ -1556,16 +1649,34 @@ static int ucma_close(struct inode *inode, struct file *filp) mutex_lock(&file->mut); list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { + ctx->destroying = 1; mutex_unlock(&file->mut); mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); + flush_workqueue(file->close_wq); + /* At that step once ctx was marked as destroying and workqueue + * was flushed we are safe from any inflights handlers that + * might put other closing task. + */ + mutex_lock(&mut); + if (!ctx->closing) { + mutex_unlock(&mut); + /* rdma_destroy_id ensures that no event handlers are + * inflight for that id before releasing it. + */ + rdma_destroy_id(ctx->cm_id); + } else { + mutex_unlock(&mut); + } + ucma_free_ctx(ctx); mutex_lock(&file->mut); } mutex_unlock(&file->mut); + destroy_workqueue(file->close_wq); kfree(file); return 0; } @@ -1629,6 +1740,7 @@ static void __exit ucma_cleanup(void) device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); + idr_destroy(&multicast_idr); } module_init(ucma_init); -- cgit 1.2.3-korg